h5tools.h

00001 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
00002  * Copyright by The HDF Group.                                               *
00003  * Copyright by the Board of Trustees of the University of Illinois.         *
00004  * All rights reserved.                                                      *
00005  *                                                                           *
00006  * This file is part of HDF5.  The full HDF5 copyright notice, including     *
00007  * terms governing use, modification, and redistribution, is contained in    *
00008  * the files COPYING and Copyright.html.  COPYING can be found at the root   *
00009  * of the source code distribution tree; Copyright.html can be found at the  *
00010  * root level of an installed copy of the electronic HDF5 document set and   *
00011  * is linked from the top-level documents page.  It can also be found at     *
00012  * http://hdfgroup.org/HDF5/doc/Copyright.html.  If you do not have          *
00013  * access to either file, you may request a copy from help@hdfgroup.org.     *
00014  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
00015 
00016 /*
00017  * Programmer:  Robb Matzke <matzke@llnl.gov>
00018  *              Thursday, July 23, 1998
00019  *
00020  * Purpose:     Support functions for the various tools.
00021  */
00022 #ifndef H5TOOLS_H__
00023 #define H5TOOLS_H__
00024 
00025 #include "hdf5.h"
00026 #include "h5tools_error.h"
00027 
00028 #define ESCAPE_HTML             1
00029 #define OPT(X,S)                ((X) ? (X) : (S))
00030 #define OPTIONAL_LINE_BREAK     "\001"  /* Special strings embedded in the output */
00031 #define START_OF_DATA       0x0001
00032 #define END_OF_DATA     0x0002
00033 
00034 /*
00035  * The output functions need a temporary buffer to hold a piece of the
00036  * dataset while it's being printed. This constant sets the limit on the
00037  * size of that temporary buffer in bytes. For efficiency's sake, choose the
00038  * largest value suitable for your machine (for testing use a small value).
00039  */
00040 #if 1
00041 #define H5TOOLS_BUFSIZE         (1024 * 1024)
00042 #else
00043 #define H5TOOLS_BUFSIZE         (1024)
00044 #endif
00045 
00046 /*
00047  * Maximum size used in a call to malloc
00048  */
00049 #define H5TOOLS_MALLOCSIZE      (128 * 1024 * 1024)
00050 
00051 /* format for hsize_t */
00052 #define HSIZE_T_FORMAT   "%"H5_PRINTF_LL_WIDTH"u"
00053 
00054 #define H5TOOLS_DUMP_MAX_RANK     H5S_MAX_RANK
00055 
00056 /*
00057  * Strings for output - these were duplicated from the h5dump.h
00058  * file in order to support region reference data display
00059  */
00060 #define ATTRIBUTE       "ATTRIBUTE"
00061 #define BLOCK           "BLOCK"
00062 #define SUPER_BLOCK     "SUPER_BLOCK"
00063 #define COMPRESSION     "COMPRESSION"
00064 #define CONCATENATOR    "//"
00065 #define COMPLEX         "COMPLEX"
00066 #define COUNT           "COUNT"
00067 #define CSET            "CSET"
00068 #define CTYPE           "CTYPE"
00069 #define DATA            "DATA"
00070 #define DATASPACE       "DATASPACE"
00071 #define EXTERNAL        "EXTERNAL"
00072 #define FILENO          "FILENO"
00073 #define HARDLINK        "HARDLINK"
00074 #define NLINK           "NLINK"
00075 #define OBJID           "OBJECTID"
00076 #define OBJNO           "OBJNO"
00077 #define S_SCALAR        "SCALAR"
00078 #define S_SIMPLE        "SIMPLE"
00079 #define S_NULL          "NULL"
00080 #define SOFTLINK        "SOFTLINK"
00081 #define EXTLINK         "EXTERNAL_LINK"
00082 #define UDLINK          "USERDEFINED_LINK"
00083 #define START           "START"
00084 #define STRIDE          "STRIDE"
00085 #define STRSIZE         "STRSIZE"
00086 #define STRPAD          "STRPAD"
00087 #define SUBSET          "SUBSET"
00088 #define FILTERS         "FILTERS"
00089 #define DEFLATE         "COMPRESSION DEFLATE"
00090 #define DEFLATE_LEVEL   "LEVEL"
00091 #define SHUFFLE         "PREPROCESSING SHUFFLE"
00092 #define FLETCHER32      "CHECKSUM FLETCHER32"
00093 #define SZIP            "COMPRESSION SZIP"
00094 #define NBIT            "COMPRESSION NBIT"
00095 #define SCALEOFFSET            "COMPRESSION SCALEOFFSET"
00096 #define SCALEOFFSET_MINBIT            "MIN BITS"
00097 #define STORAGE_LAYOUT  "STORAGE_LAYOUT"
00098 #define CONTIGUOUS      "CONTIGUOUS"
00099 #define COMPACT         "COMPACT"
00100 #define CHUNKED         "CHUNKED"
00101 #define EXTERNAL_FILE   "EXTERNAL_FILE"
00102 #define FILLVALUE       "FILLVALUE"
00103 #define FILE_CONTENTS   "FILE_CONTENTS"
00104 
00105 #define BEGIN           "{"
00106 #define END             "}"
00107 
00108 /*
00109  * dump structure for output - this was duplicated from the h5dump.h
00110  * file in order to support region reference data display
00111  */
00112 typedef struct h5tools_dump_header_t {
00113     const char *name;
00114     const char *filebegin;
00115     const char *fileend;
00116     const char *bootblockbegin;
00117     const char *bootblockend;
00118     const char *groupbegin;
00119     const char *groupend;
00120     const char *datasetbegin;
00121     const char *datasetend;
00122     const char *attributebegin;
00123     const char *attributeend;
00124     const char *datatypebegin;
00125     const char *datatypeend;
00126     const char *dataspacebegin;
00127     const char *dataspaceend;
00128     const char *databegin;
00129     const char *dataend;
00130     const char *softlinkbegin;
00131     const char *softlinkend;
00132     const char *extlinkbegin;
00133     const char *extlinkend;
00134     const char *udlinkbegin;
00135     const char *udlinkend;
00136     const char *subsettingbegin;
00137     const char *subsettingend;
00138     const char *startbegin;
00139     const char *startend;
00140     const char *stridebegin;
00141     const char *strideend;
00142     const char *countbegin;
00143     const char *countend;
00144     const char *blockbegin;
00145     const char *blockend;
00146 
00147     const char *fileblockbegin;
00148     const char *fileblockend;
00149     const char *bootblockblockbegin;
00150     const char *bootblockblockend;
00151     const char *groupblockbegin;
00152     const char *groupblockend;
00153     const char *datasetblockbegin;
00154     const char *datasetblockend;
00155     const char *attributeblockbegin;
00156     const char *attributeblockend;
00157     const char *datatypeblockbegin;
00158     const char *datatypeblockend;
00159     const char *dataspaceblockbegin;
00160     const char *dataspaceblockend;
00161     const char *datablockbegin;
00162     const char *datablockend;
00163     const char *softlinkblockbegin;
00164     const char *softlinkblockend;
00165     const char *extlinkblockbegin;
00166     const char *extlinkblockend;
00167     const char *udlinkblockbegin;
00168     const char *udlinkblockend;
00169     const char *strblockbegin;
00170     const char *strblockend;
00171     const char *enumblockbegin;
00172     const char *enumblockend;
00173     const char *structblockbegin;
00174     const char *structblockend;
00175     const char *vlenblockbegin;
00176     const char *vlenblockend;
00177     const char *subsettingblockbegin;
00178     const char *subsettingblockend;
00179     const char *startblockbegin;
00180     const char *startblockend;
00181     const char *strideblockbegin;
00182     const char *strideblockend;
00183     const char *countblockbegin;
00184     const char *countblockend;
00185     const char *blockblockbegin;
00186     const char *blockblockend;
00187 
00188     const char *dataspacedescriptionbegin;
00189     const char *dataspacedescriptionend;
00190     const char *dataspacedimbegin;
00191     const char *dataspacedimend;
00192 
00193 } h5tools_dump_header_t;
00194 
00195 /*
00196  * Information about how to format output.
00197  */
00198 typedef struct h5tool_format_t {
00199     /*
00200      * Fields associated with formatting numeric data.  If a datatype matches
00201      * multiple formats based on its size, then the first applicable format
00202      * from this list is used. However, if `raw' is non-zero then dump all
00203      * data in hexadecimal format without translating from what appears on
00204      * disk.
00205      *
00206      *   raw:        If set then print all data as hexadecimal without
00207      *               performing any conversion from disk.
00208      *
00209      *   fmt_raw:    The printf() format for each byte of raw data. The
00210      *               default is `%02x'.
00211      *
00212      *   fmt_int:    The printf() format to use when rendering data which is
00213      *               typed `int'. The default is `%d'.
00214      *
00215      *   fmt_uint:   The printf() format to use when rendering data which is
00216      *               typed `unsigned'. The default is `%u'.
00217      *
00218      *   fmt_schar:  The printf() format to use when rendering data which is
00219      *               typed `signed char'. The default is `%d'. This format is
00220      *               used ony if the `ascii' field is zero.
00221      *
00222      *   fmt_uchar:  The printf() format to use when rendering data which is
00223      *               typed `unsigned char'. The default is `%u'. This format
00224      *               is used only if the `ascii' field is zero.
00225      *
00226      *   fmt_short:  The printf() format to use when rendering data which is
00227      *               typed `short'. The default is `%d'.
00228      *
00229      *   fmt_ushort: The printf() format to use when rendering data which is
00230      *               typed `unsigned short'. The default is `%u'.
00231      *
00232      *   fmt_long:   The printf() format to use when rendering data which is
00233      *               typed `long'. The default is `%ld'.
00234      *
00235      *   fmt_ulong:  The printf() format to use when rendering data which is
00236      *               typed `unsigned long'. The default is `%lu'.
00237      *
00238      *   fmt_llong:  The printf() format to use when rendering data which is
00239      *               typed `long long'. The default depends on what printf()
00240      *               format is available to print this datatype.
00241      *
00242      *   fmt_ullong: The printf() format to use when rendering data which is
00243      *               typed `unsigned long long'. The default depends on what
00244      *               printf() format is available to print this datatype.
00245      *
00246      *   fmt_double: The printf() format to use when rendering data which is
00247      *               typed `double'. The default is `%g'.
00248      *
00249      *   fmt_float:  The printf() format to use when rendering data which is
00250      *               typed `float'. The default is `%g'.
00251      *
00252      *   ascii:      If set then print 1-byte integer values as an ASCII
00253      *               character (no quotes).  If the character is one of the
00254      *               standard C escapes then print the escaped version.  If
00255      *               the character is unprintable then print a 3-digit octal
00256      *               escape.  If `ascii' is zero then then 1-byte integers are
00257      *               printed as numeric values.  The default is zero.
00258      *
00259      *   str_locale: Determines how strings are printed. If zero then strings
00260      *               are printed like in C except. If set to ESCAPE_HTML then
00261      *               strings are printed using HTML encoding where each
00262      *               character not in the class [a-zA-Z0-9] is substituted
00263      *               with `%XX' where `X' is a hexadecimal digit.
00264      *
00265      *   str_repeat: If set to non-zero then any character value repeated N
00266      *               or more times is printed as 'C'*N
00267      *
00268      * Numeric data is also subject to the formats for individual elements.
00269      */
00270     hbool_t     raw;
00271     const char  *fmt_raw;
00272     const char  *fmt_int;
00273     const char  *fmt_uint;
00274     const char  *fmt_schar;
00275     const char  *fmt_uchar;
00276     const char  *fmt_short;
00277     const char  *fmt_ushort;
00278     const char  *fmt_long;
00279     const char  *fmt_ulong;
00280     const char  *fmt_llong;
00281     const char  *fmt_ullong;
00282     const char  *fmt_double;
00283     const char  *fmt_float;
00284     int         ascii;
00285     int         str_locale;
00286     int         str_repeat;
00287 
00288     /*
00289      * Fields associated with compound array members.
00290      *
00291      *   pre:       A string to print at the beginning of each array. The
00292      *              default value is the left square bracket `['.
00293      *
00294      *   sep:       A string to print between array values.  The default
00295      *              value is a ",\001" ("\001" indicates an optional line
00296      *              break).
00297      *
00298      *   suf:       A string to print at the end of each array.  The default
00299      *              value is a right square bracket `]'.
00300      *
00301      *   linebreaks: a boolean value to determine if we want to break the line
00302      *               after each row of an array.
00303      */
00304     const char  *arr_pre;
00305     const char  *arr_sep;
00306     const char  *arr_suf;
00307     int         arr_linebreak;
00308 
00309     /*
00310      * Fields associated with compound data types.
00311      *
00312      *   name:      How the name of the struct member is printed in the
00313      *              values. By default the name is not printed, but a
00314      *              reasonable setting might be "%s=" which prints the name
00315      *              followed by an equal sign and then the value.
00316      *
00317      *   sep:       A string that separates one member from another.  The
00318      *              default is ", \001" (the \001 indicates an optional
00319      *              line break to allow structs to span multiple lines of
00320      *              output).
00321      *
00322      *   pre:       A string to print at the beginning of a compound type.
00323      *              The default is a left curly brace.
00324      *
00325      *   suf:       A string to print at the end of each compound type.  The
00326      *              default is  right curly brace.
00327      *
00328      *   end:       a string to print after we reach the last element of
00329      *              each compound type. prints out before the suf.
00330      */
00331     const char  *cmpd_name;
00332     const char  *cmpd_sep;
00333     const char  *cmpd_pre;
00334     const char  *cmpd_suf;
00335     const char  *cmpd_end;
00336 
00337     /*
00338      * Fields associated with vlen data types.
00339      *
00340      *   sep:       A string that separates one member from another.  The
00341      *              default is ", \001" (the \001 indicates an optional
00342      *              line break to allow structs to span multiple lines of
00343      *              output).
00344      *
00345      *   pre:       A string to print at the beginning of a vlen type.
00346      *              The default is a left parentheses.
00347      *
00348      *   suf:       A string to print at the end of each vlen type.  The
00349      *              default is a right parentheses.
00350      *
00351      *   end:       a string to print after we reach the last element of
00352      *              each compound type. prints out before the suf.
00353      */
00354     const char  *vlen_sep;
00355     const char  *vlen_pre;
00356     const char  *vlen_suf;
00357     const char  *vlen_end;
00358 
00359     /*
00360      * Fields associated with the individual elements.
00361      *
00362      *   fmt:       A printf(3c) format to use to print the value string
00363      *              after it has been rendered.  The default is "%s".
00364      *
00365      *   suf1:      This string is appended to elements which are followed by
00366      *              another element whether the following element is on the
00367      *              same line or the next line.  The default is a comma.
00368      *
00369      *   suf2:      This string is appended (after `suf1') to elements which
00370      *              are followed on the same line by another element.  The
00371      *              default is a single space.
00372      */
00373     const char  *elmt_fmt;
00374     const char  *elmt_suf1;
00375     const char  *elmt_suf2;
00376 
00377     /*
00378      * Fields associated with the index values printed at the left edge of
00379      * each line of output.
00380      *
00381      *   n_fmt:     Each index value is printed according to this printf(3c)
00382      *              format string which should include a format for a long
00383      *              integer.  The default is "%lu".
00384      *
00385      *   sep:       Each integer in the index list will be separated from the
00386      *              others by this string, which defaults to a comma.
00387      *
00388      *   fmt:       After the index values are formated individually and
00389      *              separated from one another by some string, the entire
00390      *              resulting string will be formated according to this
00391      *              printf(3c) format which should include a format for a
00392      *              character string.  The default is "%s".
00393      */
00394     const char  *idx_n_fmt;             /*index number format           */
00395     const char  *idx_sep;               /*separator between numbers     */
00396     const char  *idx_fmt;               /*entire index format           */
00397 
00398     /*
00399      * Fields associated with entire lines.
00400      *
00401      *   ncols:     Number of columns per line defaults to 80.
00402      *
00403      *   per_line:  If this field has a positive value then every Nth element
00404      *              will be printed at the beginning of a line.
00405      *
00406      *   pre:       Each line of output contains an optional prefix area
00407      *              before the data. This area can contain the index for the
00408      *              first datum (represented by `%s') as well as other
00409      *              constant text.  The default value is `%s'.
00410      *
00411      *   1st:       This is the format to print at the beginning of the first
00412      *              line of output. The default value is the current value of
00413      *              `pre' described above.
00414      *
00415      *   cont:      This is the format to print at the beginning of each line
00416      *              which was continued because the line was split onto
00417      *              multiple lines. This often happens with compound
00418      *              data which is longer than one line of output. The default
00419      *              value is the current value of the `pre' field
00420      *              described above.
00421      *
00422      *   suf:       This character string will be appended to each line of
00423      *              output.  It should not contain line feeds.  The default
00424      *              is the empty string.
00425      *
00426      *   sep:       A character string to be printed after every line feed
00427      *              defaulting to the empty string.  It should end with a
00428      *              line feed.
00429      *
00430      *   multi_new: Indicates the algorithm to use when data elements tend to
00431      *              occupy more than one line of output. The possible values
00432      *              are (zero is the default):
00433      *
00434      *              0:  No consideration. Each new element is printed
00435      *                  beginning where the previous element ended.
00436      *
00437      *              1:  Print the current element beginning where the
00438      *                  previous element left off. But if that would result
00439      *                  in the element occupying more than one line and it
00440      *                  would only occupy one line if it started at the
00441      *                  beginning of a line, then it is printed at the
00442      *                  beginning of the next line.
00443      *
00444      *   multi_new: If an element is continued onto additional lines then
00445      *              should the following element begin on the next line? The
00446      *              default is to start the next element on the same line
00447      *              unless it wouldn't fit.
00448      *
00449      * indentlevel: a string that shows how far to indent if extra spacing
00450      *              is needed. dumper uses it.
00451      */
00452     int         line_ncols;             /*columns of output             */
00453     size_t      line_per_line;          /*max elements per line         */
00454     const char  *line_pre;              /*prefix at front of each line  */
00455     const char  *line_1st;              /*alternate pre. on first line  */
00456     const char  *line_cont;             /*alternate pre. on continuation*/
00457     const char  *line_suf;              /*string to append to each line */
00458     const char  *line_sep;              /*separates lines               */
00459     int         line_multi_new;         /*split multi-line outputs?     */
00460     const char  *line_indent;           /*for extra identation if we need it*/
00461 
00462     /*used to skip the first set of checks for line length*/
00463     int skip_first;
00464 
00465     /*flag used to hide or show the file number for obj refs*/
00466     int obj_hidefileno;
00467 
00468     /*string used to format the output for the obje refs*/
00469     const char *obj_format;
00470 
00471     /*flag used to hide or show the file number for dataset regions*/
00472     int dset_hidefileno;
00473 
00474     /*string used to format the output for the dataset regions*/
00475     const char *dset_format;
00476 
00477     const char *dset_blockformat_pre;
00478     const char *dset_ptformat_pre;
00479     const char *dset_ptformat;
00480 
00481     /*print array indices in output matrix */
00482     int pindex;
00483 
00484     /*escape non printable characters */
00485     int do_escape;
00486 
00487 } h5tool_format_t;
00488 
00489 typedef struct h5tools_context_t {
00490     size_t cur_column;                       /*current column for output */
00491     size_t cur_elmt;                         /*current element/output line */
00492     int  need_prefix;                        /*is line prefix needed? */
00493     unsigned ndims;                          /*dimensionality  */
00494     hsize_t p_min_idx[H5S_MAX_RANK];         /*min selected index */
00495     hsize_t p_max_idx[H5S_MAX_RANK];         /*max selected index */
00496     int  prev_multiline;                     /*was prev datum multiline? */
00497     size_t prev_prefix_len;                  /*length of previous prefix */
00498     int  continuation;                       /*continuation of previous data?*/
00499     hsize_t size_last_dim;                   /*the size of the last dimension,
00500                                               *needed so we can break after each
00501                                               *row */
00502     int  indent_level;                 /*the number of times we need some
00503                                        *extra indentation */
00504     int  default_indent_level;        /*this is used when the indent level gets changed */
00505     hsize_t acc[H5S_MAX_RANK];        /* accumulator position */
00506     hsize_t pos[H5S_MAX_RANK];        /* matrix position */
00507     hsize_t sm_pos;                   /* current stripmine element position */
00508 } h5tools_context_t;
00509 
00510 /* a structure to hold the subsetting particulars for a dataset */
00511 struct subset_t {
00512     hsize_t *start;
00513     hsize_t *stride;
00514     hsize_t *count;
00515     hsize_t *block;
00516 };
00517 
00518 /* The following include, h5tools_str.h, must be after the
00519  * above stucts are defined. There is a dependency in the following
00520  * include that hasn't been identified yet. */
00521 
00522 #include "h5tools_str.h"
00523 
00524 extern FILE   *rawdatastream;       /* output stream for raw data */
00525 extern int     bin_output;          /* binary output */
00526 extern int     bin_form;            /* binary form */
00527 extern int     region_output;       /* region output */
00528 
00529 /* Strings for output */
00530 #define H5_TOOLS_GROUP           "GROUP"
00531 #define H5_TOOLS_DATASET         "DATASET"
00532 #define H5_TOOLS_DATATYPE        "DATATYPE"
00533 
00534 /* Definitions of useful routines */
00535 extern void     h5tools_init(void);
00536 extern void     h5tools_close(void);
00537 extern hid_t    h5tools_fopen(const char *fname, unsigned flags, hid_t fapl,
00538                     const char *driver, char *drivername, size_t drivername_len);
00539 extern int      h5tools_dump_dset(FILE *stream, const h5tool_format_t *info, hid_t dset,
00540                                   hid_t p_typ, struct subset_t *sset, int indentlevel);
00541 extern int      h5tools_dump_mem(FILE *stream, const h5tool_format_t *info, hid_t obj_id,
00542                                  hid_t type, hid_t space, void *mem, int indentlevel);
00543 extern hid_t    h5tools_get_native_type(hid_t type);
00544 extern hid_t    h5tools_get_little_endian_type(hid_t type);
00545 extern hid_t    h5tools_get_big_endian_type(hid_t type);
00546 
00547 
00548 extern void     h5tools_dump_simple_data(FILE *stream, const h5tool_format_t *info, hid_t container,
00549                          h5tools_context_t *ctx/*in,out*/, unsigned flags,
00550                          hsize_t nelmts, hid_t type, void *_mem);
00551 
00552 extern int      h5tools_canreadf(const char* name,
00553                                  hid_t dcpl_id);
00554 extern int      h5tools_can_encode(H5Z_filter_t filtn);
00555 
00556 void            init_acc_pos(h5tools_context_t *ctx, hsize_t *dims);
00557 /*
00558  * new functions needed to display region reference data
00559  */
00560 void            h5tools_dump_datatype(FILE *stream, const h5tool_format_t *info,
00561                          h5tools_context_t *ctx/*in,out*/, hid_t type);
00562 int             h5tools_print_dataspace(h5tools_str_t *buffer/*in,out*/, hid_t space);
00563 int             h5tools_print_datatype(h5tools_str_t *buffer/*in,out*/,
00564                          const h5tool_format_t *info, h5tools_context_t *ctx/*in,out*/,
00565                          hid_t type);
00566 int             h5tools_print_enum(h5tools_str_t *buffer/*in,out*/, hid_t type);
00567 #endif /* H5TOOLS_H__ */
00568