H5ACpublic.h

00001 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
00002  * Copyright by The HDF Group.                                               *
00003  * Copyright by the Board of Trustees of the University of Illinois.         *
00004  * All rights reserved.                                                      *
00005  *                                                                           *
00006  * This file is part of HDF5.  The full HDF5 copyright notice, including     *
00007  * terms governing use, modification, and redistribution, is contained in    *
00008  * the files COPYING and Copyright.html.  COPYING can be found at the root   *
00009  * of the source code distribution tree; Copyright.html can be found at the  *
00010  * root level of an installed copy of the electronic HDF5 document set and   *
00011  * is linked from the top-level documents page.  It can also be found at     *
00012  * http://hdfgroup.org/HDF5/doc/Copyright.html.  If you do not have          *
00013  * access to either file, you may request a copy from help@hdfgroup.org.     *
00014  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
00015 
00016 /*-------------------------------------------------------------------------
00017  *
00018  * Created:             H5ACpublic.h
00019  *                      Jul 10 1997
00020  *                      Robb Matzke <matzke@llnl.gov>
00021  *
00022  * Purpose:             Public include file for cache functions.
00023  *
00024  * Modifications:
00025  *
00026  *-------------------------------------------------------------------------
00027  */
00028 #ifndef _H5ACpublic_H
00029 #define _H5ACpublic_H
00030 
00031 /* Public headers needed by this file */
00032 #include "H5public.h"
00033 #include "H5Cpublic.h"
00034 
00035 #ifdef __cplusplus
00036 extern "C" {
00037 #endif
00038 
00039 #define H5AC__MAX_TRACE_FILE_NAME_LEN   1024
00040 
00041 /****************************************************************************
00042  *
00043  * structure H5AC_cache_config_t
00044  *
00045  * H5AC_cache_config_t is a public structure intended for use in public APIs.
00046  * At least in its initial incarnation, it is basicaly a copy of struct
00047  * H5C_auto_size_ctl_t, minus the report_fcn field, and plus the
00048  * dirty_bytes_threshold field.
00049  *
00050  * The report_fcn field is omitted, as including it would require us to
00051  * make H5C_t structure public.
00052  *
00053  * The dirty_bytes_threshold field does not appear in H5C_auto_size_ctl_t,
00054  * as synchronization between caches on different processes is handled at
00055  * the H5AC level, not at the level of H5C.  Note however that there is
00056  * considerable interaction between this value and the other fields in this
00057  * structure.
00058  *
00059  * Similarly, the open_trace_file, close_trace_file, and trace_file_name
00060  * fields do not appear in H5C_auto_size_ctl_t, as most trace file
00061  * issues are handled at the H5AC level.  The one exception is storage of
00062  * the pointer to the trace file, which is handled by H5C.
00063  *
00064  * The structure is in H5ACpublic.h as we may wish to allow different
00065  * configuration options for metadata and raw data caches.
00066  *
00067  * The fields of the structure are discussed individually below:
00068  *
00069  * version: Integer field containing the version number of this version
00070  *      of the H5AC_cache_config_t structure.  Any instance of
00071  *      H5AC_cache_config_t passed to the cache must have a known
00072  *      version number, or an error will be flagged.
00073  *
00074  * rpt_fcn_enabled: Boolean field used to enable and disable the default
00075  *      reporting function.  This function is invoked every time the
00076  *      automatic cache resize code is run, and reports on its activities.
00077  *
00078  *      This is a debugging function, and should normally be turned off.
00079  *
00080  * open_trace_file: Boolean field indicating whether the trace_file_name
00081  *      field should be used to open a trace file for the cache.
00082  *
00083  *      The trace file is a debuging feature that allow the capture of
00084  *      top level metadata cache requests for purposes of debugging and/or
00085  *      optimization.  This field should normally be set to FALSE, as
00086  *      trace file collection imposes considerable overhead.
00087  *
00088  *      This field should only be set to TRUE when the trace_file_name
00089  *      contains the full path of the desired trace file, and either
00090  *      there is no open trace file on the cache, or the close_trace_file
00091  *      field is also TRUE.
00092  *
00093  * close_trace_file: Boolean field indicating whether the current trace
00094  *      file (if any) should be closed.
00095  *
00096  *      See the above comments on the open_trace_file field.  This field
00097  *      should be set to FALSE unless there is an open trace file on the
00098  *      cache that you wish to close.
00099  *
00100  * trace_file_name: Full path of the trace file to be opened if the
00101  *      open_trace_file field is TRUE.
00102  *
00103  *      In the parallel case, an ascii representation of the mpi rank of
00104  *      the process will be appended to the file name to yield a unique
00105  *      trace file name for each process.
00106  *
00107  *      The length of the path must not exceed H5AC__MAX_TRACE_FILE_NAME_LEN
00108  *      characters.
00109  *
00110  * evictions_enabled:  Boolean field used to either report the current
00111  *      evictions enabled status of the cache, or to set the cache's
00112  *      evictions enabled status.
00113  *
00114  *      In general, the metadata cache should always be allowed to
00115  *      evict entries.  However, in some cases it is advantageous to
00116  *      disable evictions briefly, and thereby postpone metadata
00117  *      writes.  However, this must be done with care, as the cache
00118  *      can grow quickly.  If you do this, re-enable evictions as
00119  *      soon as possible and monitor cache size.
00120  *
00121  *      At present, evictions can only be disabled if automatic
00122  *      cache resizing is also disabled (that is, ( incr_mode ==
00123  *      H5C_incr__off ) && ( decr_mode == H5C_decr__off )).  There
00124  *      is no logical reason why this should be so, but it simplifies
00125  *      implementation and testing, and I can't think of any reason
00126  *      why it would be desireable.  If you can think of one, I'll
00127  *      revisit the issue.
00128  *
00129  * set_initial_size: Boolean flag indicating whether the size of the
00130  *      initial size of the cache is to be set to the value given in
00131  *      the initial_size field.  If set_initial_size is FALSE, the
00132  *      initial_size field is ignored.
00133  *
00134  * initial_size: If enabled, this field contain the size the cache is
00135  *      to be set to upon receipt of this structure.  Needless to say,
00136  *      initial_size must lie in the closed interval [min_size, max_size].
00137  *
00138  * min_clean_fraction: double in the range 0 to 1 indicating the fraction
00139  *      of the cache that is to be kept clean.  This field is only used
00140  *      in parallel mode.  Typical values are 0.1 to 0.5.
00141  *
00142  * max_size: Maximum size to which the cache can be adjusted.  The
00143  *      supplied value must fall in the closed interval
00144  *      [MIN_MAX_CACHE_SIZE, MAX_MAX_CACHE_SIZE].  Also, max_size must
00145  *      be greater than or equal to min_size.
00146  *
00147  * min_size: Minimum size to which the cache can be adjusted.  The
00148  *      supplied value must fall in the closed interval
00149  *      [H5C__MIN_MAX_CACHE_SIZE, H5C__MAX_MAX_CACHE_SIZE].  Also, min_size
00150  *      must be less than or equal to max_size.
00151  *
00152  * epoch_length: Number of accesses on the cache over which to collect
00153  *      hit rate stats before running the automatic cache resize code,
00154  *      if it is enabled.
00155  *
00156  *      At the end of an epoch, we discard prior hit rate data and start
00157  *      collecting afresh.  The epoch_length must lie in the closed
00158  *      interval [H5C__MIN_AR_EPOCH_LENGTH, H5C__MAX_AR_EPOCH_LENGTH].
00159  *
00160  *
00161  * Cache size increase control fields:
00162  *
00163  * incr_mode: Instance of the H5C_cache_incr_mode enumerated type whose
00164  *      value indicates how we determine whether the cache size should be
00165  *      increased.  At present there are two possible values:
00166  *
00167  *      H5C_incr__off:  Don't attempt to increase the size of the cache
00168  *              automatically.
00169  *
00170  *              When this increment mode is selected, the remaining fields
00171  *              in the cache size increase section ar ignored.
00172  *
00173  *      H5C_incr__threshold: Attempt to increase the size of the cache
00174  *              whenever the average hit rate over the last epoch drops
00175  *              below the value supplied in the lower_hr_threshold
00176  *              field.
00177  *
00178  *              Note that this attempt will fail if the cache is already
00179  *              at its maximum size, or if the cache is not already using
00180  *              all available space.
00181  *
00182  *      Note that you must set decr_mode to H5C_incr__off if you
00183  *      disable metadata cache entry evictions.
00184  *
00185  * lower_hr_threshold: Lower hit rate threshold.  If the increment mode
00186  *      (incr_mode) is H5C_incr__threshold and the hit rate drops below the
00187  *      value supplied in this field in an epoch, increment the cache size by
00188  *      size_increment.  Note that cache size may not be incremented above
00189  *      max_size, and that the increment may be further restricted by the
00190  *      max_increment field if it is enabled.
00191  *
00192  *      When enabled, this field must contain a value in the range [0.0, 1.0].
00193  *      Depending on the incr_mode selected, it may also have to be less than
00194  *      upper_hr_threshold.
00195  *
00196  * increment:  Double containing the multiplier used to derive the new
00197  *      cache size from the old if a cache size increment is triggered.
00198  *      The increment must be greater than 1.0, and should not exceed 2.0.
00199  *
00200  *      The new cache size is obtained my multiplying the current max cache
00201  *      size by the increment, and then clamping to max_size and to stay
00202  *      within the max_increment as necessary.
00203  *
00204  * apply_max_increment:  Boolean flag indicating whether the max_increment
00205  *      field should be used to limit the maximum cache size increment.
00206  *
00207  * max_increment: If enabled by the apply_max_increment field described
00208  *      above, this field contains the maximum number of bytes by which the
00209  *      cache size can be increased in a single re-size.
00210  *
00211  * flash_incr_mode:  Instance of the H5C_cache_flash_incr_mode enumerated
00212  *      type whose value indicates whether and by which algorithm we should
00213  *      make flash increases in the size of the cache to accomodate insertion
00214  *      of large entries and large increases in the size of a single entry.
00215  *
00216  *      The addition of the flash increment mode was occasioned by performance
00217  *      problems that appear when a local heap is increased to a size in excess
00218  *      of the current cache size.  While the existing re-size code dealt with
00219  *      this eventually, performance was very bad for the remainder of the
00220  *      epoch.
00221  *
00222  *      At present, there are two possible values for the flash_incr_mode:
00223  *
00224  *      H5C_flash_incr__off:  Don't perform flash increases in the size of
00225  *              the cache.
00226  *
00227  *      H5C_flash_incr__add_space:  Let x be either the size of a newly
00228  *              newly inserted entry, or the number of bytes by which the
00229  *              size of an existing entry has been increased.
00230  *
00231  *              If
00232  *                      x > flash_threshold * current max cache size,
00233  *
00234  *              increase the current maximum cache size by x * flash_multiple
00235  *              less any free space in the cache, and star a new epoch.  For
00236  *              now at least, pay no attention to the maximum increment.
00237  *
00238  *      In both of the above cases, the flash increment pays no attention to
00239  *      the maximum increment (at least in this first incarnation), but DOES
00240  *      stay within max_size.
00241  *
00242  *      With a little thought, it should be obvious that the above flash
00243  *      cache size increase algorithm is not sufficient for all circumstances --
00244  *      for example, suppose the user round robins through
00245  *      (1/flash_threshold) +1 groups, adding one data set to each on each
00246  *      pass.  Then all will increase in size at about the same time, requiring
00247  *      the max cache size to at least double to maintain acceptable
00248  *      performance, however the above flash increment algorithm will not be
00249  *      triggered.
00250  *
00251  *      Hopefully, the add space algorithms detailed above will be sufficient
00252  *      for the performance problems encountered to date.  However, we should
00253  *      expect to revisit the issue.
00254  *
00255  * flash_multiple: Double containing the multiple described above in the
00256  *      H5C_flash_incr__add_space section of the discussion of the
00257  *      flash_incr_mode section.  This field is ignored unless flash_incr_mode
00258  *      is H5C_flash_incr__add_space.
00259  *
00260  * flash_threshold: Double containing the factor by which current max cache size
00261  *      is multiplied to obtain the size threshold for the add_space flash
00262  *      increment algorithm.  The field is ignored unless flash_incr_mode is
00263  *      H5C_flash_incr__add_space.
00264  *
00265  *
00266  *
00267  * Cache size decrease control fields:
00268  *
00269  * decr_mode: Instance of the H5C_cache_decr_mode enumerated type whose
00270  *      value indicates how we determine whether the cache size should be
00271  *      decreased.  At present there are four possibilities.
00272  *
00273  *      H5C_decr__off:  Don't attempt to decrease the size of the cache
00274  *              automatically.
00275  *
00276  *              When this increment mode is selected, the remaining fields
00277  *              in the cache size decrease section are ignored.
00278  *
00279  *      H5C_decr__threshold: Attempt to decrease the size of the cache
00280  *              whenever the average hit rate over the last epoch rises
00281  *              above the value supplied in the upper_hr_threshold
00282  *              field.
00283  *
00284  *      H5C_decr__age_out:  At the end of each epoch, search the cache for
00285  *              entries that have not been accessed for at least the number
00286  *              of epochs specified in the epochs_before_eviction field, and
00287  *              evict these entries.  Conceptually, the maximum cache size
00288  *              is then decreased to match the new actual cache size.  However,
00289  *              this reduction may be modified by the min_size, the
00290  *              max_decrement, and/or the empty_reserve.
00291  *
00292  *      H5C_decr__age_out_with_threshold:  Same as age_out, but we only
00293  *              attempt to reduce the cache size when the hit rate observed
00294  *              over the last epoch exceeds the value provided in the
00295  *              upper_hr_threshold field.
00296  *
00297  *      Note that you must set decr_mode to H5C_decr__off if you
00298  *      disable metadata cache entry evictions.
00299  *
00300  * upper_hr_threshold: Upper hit rate threshold.  The use of this field
00301  *      varies according to the current decr_mode:
00302  *
00303  *      H5C_decr__off or H5C_decr__age_out:  The value of this field is
00304  *              ignored.
00305  *
00306  *      H5C_decr__threshold:  If the hit rate exceeds this threshold in any
00307  *              epoch, attempt to decrement the cache size by size_decrement.
00308  *
00309  *              Note that cache size may not be decremented below min_size.
00310  *
00311  *              Note also that if the upper_threshold is 1.0, the cache size
00312  *              will never be reduced.
00313  *
00314  *      H5C_decr__age_out_with_threshold:  If the hit rate exceeds this
00315  *              threshold in any epoch, attempt to reduce the cache size
00316  *              by evicting entries that have not been accessed for more
00317  *              than the specified number of epochs.
00318  *
00319  * decrement: This field is only used when the decr_mode is
00320  *      H5C_decr__threshold.
00321  *
00322  *      The field is a double containing the multiplier used to derive the
00323  *      new cache size from the old if a cache size decrement is triggered.
00324  *      The decrement must be in the range 0.0 (in which case the cache will
00325  *      try to contract to its minimum size) to 1.0 (in which case the
00326  *      cache will never shrink).
00327  *
00328  * apply_max_decrement:  Boolean flag used to determine whether decrements
00329  *      in cache size are to be limited by the max_decrement field.
00330  *
00331  * max_decrement: Maximum number of bytes by which the cache size can be
00332  *      decreased in a single re-size.  Note that decrements may also be
00333  *      restricted by the min_size of the cache, and (in age out modes) by
00334  *      the empty_reserve field.
00335  *
00336  * epochs_before_eviction:  Integer field used in H5C_decr__age_out and
00337  *      H5C_decr__age_out_with_threshold decrement modes.
00338  *
00339  *      This field contains the number of epochs an entry must remain
00340  *      unaccessed before it is evicted in an attempt to reduce the
00341  *      cache size.  If applicable, this field must lie in the range
00342  *      [1, H5C__MAX_EPOCH_MARKERS].
00343  *
00344  * apply_empty_reserve:  Boolean field controlling whether the empty_reserve
00345  *      field is to be used in computing the new cache size when the
00346  *      decr_mode is H5C_decr__age_out or H5C_decr__age_out_with_threshold.
00347  *
00348  * empty_reserve:  To avoid a constant racheting down of cache size by small
00349  *      amounts in the H5C_decr__age_out and H5C_decr__age_out_with_threshold
00350  *      modes, this field allows one to require that any cache size
00351  *      reductions leave the specified fraction of unused space in the cache.
00352  *
00353  *      The value of this field must be in the range [0.0, 1.0].  I would
00354  *      expect typical values to be in the range of 0.01 to 0.1.
00355  *
00356  *
00357  * Parallel Configuration Fields:
00358  *
00359  * In PHDF5, all operations that modify metadata must be executed collectively.
00360  * We used to think that this was enough to ensure consistency across the
00361  * metadata caches, but since we allow processes to read metadata individually,
00362  * the order of dirty entries in the LRU list can vary across processes,
00363  * which can result in inconsistencies between the caches.
00364  *
00365  * To prevent this, only the metadata cache on process 0 is allowed to write
00366  * to file, and then only after synchronizing with the other caches.  After
00367  * it writes entries to file, it sends the base addresses of the now clean
00368  * entries to the other caches, so they can mark these entries clean as well.
00369  *
00370  * The different caches know when to synchronize caches by counting the
00371  * number of bytes of dirty metadata created by the collective operations
00372  * modifying metadata.  Whenever this count exceeds a user specified
00373  * threshold (see below), process 0 flushes down to its minimum clean size,
00374  * and then sends the list of newly cleaned entries to the other caches.
00375  *
00376  * dirty_bytes_threshold:  Threshold of dirty byte creation used to
00377  *      synchronize updates between caches. (See above for outline and
00378  *      motivation.)
00379  *
00380  *      This value MUST be consistant across all processes accessing the
00381  *      file.  This field is ignored unless HDF5 has been compiled for
00382  *      parallel.
00383  *
00384  ****************************************************************************/
00385 
00386 #define H5AC__CURR_CACHE_CONFIG_VERSION 1
00387 
00388 typedef struct H5AC_cache_config_t
00389 {
00390     /* general configuration fields: */
00391     int                      version;
00392 
00393     hbool_t                  rpt_fcn_enabled;
00394 
00395     hbool_t                  open_trace_file;
00396     hbool_t                  close_trace_file;
00397     char                     trace_file_name[H5AC__MAX_TRACE_FILE_NAME_LEN + 1];
00398 
00399     hbool_t                  evictions_enabled;
00400 
00401     hbool_t                  set_initial_size;
00402     size_t                   initial_size;
00403 
00404     double                   min_clean_fraction;
00405 
00406     size_t                   max_size;
00407     size_t                   min_size;
00408 
00409     long int                 epoch_length;
00410 
00411 
00412     /* size increase control fields: */
00413     enum H5C_cache_incr_mode incr_mode;
00414 
00415     double                   lower_hr_threshold;
00416 
00417     double                   increment;
00418 
00419     hbool_t                  apply_max_increment;
00420     size_t                   max_increment;
00421 
00422     enum H5C_cache_flash_incr_mode      flash_incr_mode;
00423     double                              flash_multiple;
00424     double                              flash_threshold;
00425 
00426 
00427     /* size decrease control fields: */
00428     enum H5C_cache_decr_mode decr_mode;
00429 
00430     double                   upper_hr_threshold;
00431 
00432     double                   decrement;
00433 
00434     hbool_t                  apply_max_decrement;
00435     size_t                   max_decrement;
00436 
00437     int                      epochs_before_eviction;
00438 
00439     hbool_t                  apply_empty_reserve;
00440     double                   empty_reserve;
00441 
00442 
00443     /* parallel configuration fields: */
00444     int                      dirty_bytes_threshold;
00445 
00446 } H5AC_cache_config_t;
00447 
00448 
00449 #ifdef __cplusplus
00450 }
00451 #endif
00452 #endif