libdas2
das2 core C utilities
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
dataset.h
Go to the documentation of this file.
1 /* Copyright (C) 2017-2018 Chris Piker <chris-piker@uiowa.edu>
2  *
3  * This file is part of libdas2, the Core Das2 C Library.
4  *
5  * Libdas2 is free software; you can redistribute it and/or modify it under
6  * the terms of the GNU Lesser General Public License version 2.1 as published
7  * by the Free Software Foundation.
8  *
9  * Libdas2 is distributed in the hope that it will be useful, but WITHOUT ANY
10  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public License
15  * version 2.1 along with libdas2; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 
21 #ifndef _das_dataset_h_
22 #define _das_dataset_h_
23 
24 #include <das2/dimension.h>
25 
26 #ifdef __cplusplus
27 extern "C" {
28 #endif
29 
30 /* Old initial comment that kicked off the entire das2 data model design...
31  *
32  * The structures below are the start of an idea on how to get independent
33  * parameters for data at any particular index. These are just thoughts
34  * at the moment and don't affect any working code. There are many ways
35  * to do this. The CDF and QStream assumption is that there are the same
36  * number of parameters locating a data point in parameter space as there
37  * are indices to the dataset. Because of this x,y,z scatter data are
38  * hard to handle.
39  *
40  * For x,y,z scatter lists there is 1 index for any point in the dataset,
41  * but for each index there are 2 independent parameters. Basically QStream
42  * and CDF assume that all datasets are CUBEs in parameter space but this
43  * is not the case for a great many sets.
44  *
45  * To adequately handle these 'path' datasets a parameter map is required.
46  * The mapping takes 1 index value per data rank and returns 1 to N parameter
47  * values.
48  *
49  * These structures start to handle this idea but are just doodles at this
50  * point. -cwp 2017-07-25
51  */
52 
53 /* Second comment that added desire for flexible data types ...
54  *
55  * Thinking about coordinate returns, how about a data set of thefts / month
56  * in 5 American cities... Won't usually come up, but should be possible
57  * to handle. Here's the data set:
58  *
59  * 2016-01 2016-02 2016-03 2016-04 2016-05 2016-06
60  * Baltimore 2351 3789 4625 5525 6135 5902
61  * Bogotá 109065 110365 99625 98265 43850 33892
62  * Chicago 4789 5764 8901 10145 13456 22678
63  * Des Moines 4 10 33 35 44 107
64  *
65  * Properties: Title -> "Thefts/Month for selected cities"
66  *
67  * Okay, the X axis data type is text[12] (need null char)
68  * Y axis data type is datetime
69  * Z axis data type is datum, "thefts month**-1"
70  *
71  * So what is the return value from pDs->bin(pDs, 0, 0) ?
72  *
73  * The bin is defined on the space of all UTC times, and on the space of all
74  * cities in the data set.
75  *
76  *
77  * So, what about this common data set, interference events:
78  *
79  * |<---------- Bin ------>| |<----- Value --->|
80  * 2016-01-01T14:00 2016-01-02T02:20 Mag Roll
81  * 2016-01-01T15:40 2016-01-01T15:41 Stabilization Pulse
82  * 2016-01-01T15:43 2016-01-01T15:44 Stabilization Pulse
83  * 2016-01-01T15:45 2016-01-01T15:47 Stabilization Pulse
84  * 2016-01-01T15:48 2016-01-01T15:50 Stabilization Pulse
85  *
86  * So what is the return value from pDs->bin(pDs, 0) ?
87  *
88  * The space is UTC time, So each bin start and stop is defined on the space
89  * of all UTC times.
90  * -cwp 2017-??-??
91  */
92 
133 typedef struct dataset {
134  DasDesc base; /* This would be equivalent to the properties for
135  a packet descriptor. Typically in das 2.2 packets
136  don't have a descriptor, only streams and planes
137  but access to the stream descriptor forwards through
138  here. */
139 
140  int nRank; /* The number of whole-dataset index dimenions.
141  * Variables can define internal dimensions but they
142  * can't use indices in the first nRank positions for
143  * internal use, as these are used to correlate values
144  * across the dataset. */
145 
146  char sId[64]; /* A text identifier for this instance of a data set */
147  char sGroupId[64]; /* A text identifier for the join group for this */
148  /* dataset. Das2 datasets with the same groupID should
149  be joined automatically by display clients. */
150 
151  size_t uDims; /* Number of dimensions, das2 datasets are
152  * implicitly bundles in qdataset terms. */
153 
154  DasDim** lDims; /* The data variable object arrays */
155  size_t uSzDims; /* Current size of variable array */
156 
157  size_t uArrays; /* The number of low-level arrays */
158  DasAry** lArrays; /* An array of array objects */
159  size_t uSzArrays;
160 
161 } DasDs;
162 
194  const char* sId, const char* sGroupId, int nRank
195 );
196 
205 void del_DasDs(DasDs* pThis);
206 
207 
251 int DasDs_shape(const DasDs* pThis, ptrdiff_t* pShape);
252 
269 ptrdiff_t DasDs_lengthIn(const DasDs* pThis, int nIdx, ptrdiff_t* pLoc);
270 
309 typedef struct dasds_iterator_t{
310 
312  bool done;
313 
316  ptrdiff_t index[DASIDX_MAX];
317 
318  int rank;
319  ptrdiff_t shape[DASIDX_MAX]; /* Used for CUBIC datasets */
320  ptrdiff_t nLenIn; /* Used for ragged datasets */
321  bool ragged;
322  const DasDs* pDs;
324 
341 void dasds_iter_init(dasds_iterator* pIter, const DasDs* pDs);
342 
359 bool dasds_iter_next(dasds_iterator* pIter);
360 
361 
362 
369 const char* DasDs_id(const DasDs* pThis);
370 
372 #define DasDs_rank(P) P->nRank
373 
397 bool DasDs_addAry(DasDs* pThis, DasAry* pAry);
398 
418 DasDim* DasDs_makeDim(DasDs* pThis, enum dim_type dType, const char* sId);
419 
420 
433 int DasDs_copyInProps(DasDs* pThis, const DasDesc* pOther);
434 
435 
452 const char* DasDs_group(const DasDs* pThis);
453 
461 size_t DasDs_numDims(const DasDs* pThis, enum dim_type vt);
462 
470 const DasDim* DasDs_getDim(const DasDs* pThis, size_t idx, enum dim_type vt);
471 
479 DasDim* DasDs_getDimById(DasDs* pThis, const char* sId);
480 
487 char* DasDs_toStr(const DasDs* pThis, char* sBuf, int nLen);
488 
489 
490 
491 /* Ideas I'm still working on...
492 
493 
494 / * The two functions below are really useful but I'll need to crack open
495  a double pack of Flex and Bison to get it done so I'm punting for now. * /
496 
497 / * Ex Expression: $spec_dens[i][j][k] * /
498 const Function* Dataset_evalDataExp(Dataset* pThis, const char* sExpression);
499 
500 / * Ex Expression: $craft_alt[i][j] - 0.5 * $delay_time[k] * 299792 * /
501 const Function* Dataset_evalCoordExp(Dataset* pThis, const char* sExpression);
502 
503 
504 / **
505  *
506  * This function answers the question by either provided the spanning set of
507  * coordinates or returning nothing.
508  * For a dataset to be defined
509  * on a coordinate grid there must exist one coordinate set for each index in
510  * the data set and each coordinate must be a function of only one index.
511  *
512  * Non-gridded data can still be sliced but coordintate slices will need to be
513  * produced as well in order to plot the slice. See Dataset_orthogonal()
514  *
515  * @param pThis A correlated dataset object
516  * @param sDs The string id of the dataset in question
517  * @param[out] psCoords a pointer to a const char* array to recived the
518  * coordinate ID's forming the spanning set. Note that every
519  * combination of returned coordinates satisfies the orthogonal
520  * condition and would return true from Dataset_orthogonal().
521  *
522  * @return The number of spanning coordinates. Will be equal to the
523  * rank of the dataset.
524  * /
525 size_t Dataset_gridCoords(
526  const Dataset* pThis, const char* sDs, const char** psCoords
527 );
528 
529 bool DataGen_grid(const DataSet* pDataset);
530 
531 const DataSet** Dg_griddedIn(const DataSet* pDataset);
532 
533 
534 
535 / ** Get the coefficients for iterating over a 1-D slice of a regular (i.e.
536  * non-ragged) dataset.
537  *
538  * This function dose not work for ragged datasets and merely returns NULL if
539  * asked for iteration coefficents for such a set. In such a case use
540  * Dataset_copySlice1D().
541  *
542  * /
543 const void* Dataset_slice1D(
544  const Dataset* pThis, const char* sDs, const char* sCoord, int iCoordIdx,
545  int* pCoeff
546 );
547 
548 / ** Increment the reference count on any array objects that are part of
549  * a data space.
550  *
551  * This is useful in instances where the underlying data arrays are going
552  * to be represented by an organizational structure other than datasets
553  * and DataSets since Das array objects only free data memory if thier
554  * feference count is zero.
555  *
556  * @param pThis
557  * /
558 void DataSpace_incAryRef(Dataset* pThis);
559 
560 / * Need a way to trigger callbacks from datasets changing, not just
561  packets changing. It could be useful to work on items from the
562  dataset level instead of just the packet level * /
563  bool DataSpace_stream(Dataset* pThis); * /
564 
565 
566 
567 / ** Indicate the physical degrees of freedom for a dataset by denoting a
568  * complete list of coordinate sets.
569  *
570  * A list of coordinates over which an entire dataset is defined is called
571  * a span. Datasets may have 1-N spans.
572  * /
573 int DataSpace_addSpan(const char* sDsId, const char** lCoords, size_t nCoords);
574 */
575 
578 #ifdef __cplusplus
579 }
580 #endif
581 
582 #endif /* _das_dataset_h */
Das2 Physical Dimensions.
Definition: dimension.h:121
Das2 Datasets.
Definition: dataset.h:133
DasDim * DasDs_makeDim(DasDs *pThis, enum dim_type dType, const char *sId)
Make a new dimension within this dataset.
bool dasds_iter_next(dasds_iterator *pIter)
Increment the iterator&#39;s index by one position, rolling as needed at data boundaries.
void dasds_iter_init(dasds_iterator *pIter, const DasDs *pDs)
Initialize a const dataset iterator.
Base structure for Stream Header Items.
Definition: descriptor.h:80
DasDim * DasDs_getDimById(DasDs *pThis, const char *sId)
Get a dimension by string id.
DasDs * new_DasDs(const char *sId, const char *sGroupId, int nRank)
Create a new dataset object.
Dataset iterator structure.
Definition: dataset.h:309
char * DasDs_toStr(const DasDs *pThis, char *sBuf, int nLen)
Print a string reprenestation of this dataset.
bool DasDs_addAry(DasDs *pThis, DasAry *pAry)
Add an array to the dataset, stealing it&#39;s reference.
size_t DasDs_numDims(const DasDs *pThis, enum dim_type vt)
Get the number of physical dimensions in this dataset.
bool done
If true the value in index is valid, false otherwise.
Definition: dataset.h:312
int DasDs_copyInProps(DasDs *pThis, const DasDesc *pOther)
Copy in dataset properties from some other descriptor.
const char * DasDs_group(const DasDs *pThis)
Get the data set group id.
const DasDim * DasDs_getDim(const DasDs *pThis, size_t idx, enum dim_type vt)
Get a dimension by index.
int DasDs_shape(const DasDs *pThis, ptrdiff_t *pShape)
Return current valid ranges for whole data set iteration.
void del_DasDs(DasDs *pThis)
Delete a Correlated Data object, cleaning up it&#39;s memory.
ptrdiff_t DasDs_lengthIn(const DasDs *pThis, int nIdx, ptrdiff_t *pLoc)
Return the current max value index value + 1 for any partial index.
Dynamic recursive ragged arrays.
Definition: array.h:193