H5Cpkg.h

00001 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
00002  * Copyright by The HDF Group.                                               *
00003  * Copyright by the Board of Trustees of the University of Illinois.         *
00004  * All rights reserved.                                                      *
00005  *                                                                           *
00006  * This file is part of HDF5.  The full HDF5 copyright notice, including     *
00007  * terms governing use, modification, and redistribution, is contained in    *
00008  * the files COPYING and Copyright.html.  COPYING can be found at the root   *
00009  * of the source code distribution tree; Copyright.html can be found at the  *
00010  * root level of an installed copy of the electronic HDF5 document set and   *
00011  * is linked from the top-level documents page.  It can also be found at     *
00012  * http://hdfgroup.org/HDF5/doc/Copyright.html.  If you do not have          *
00013  * access to either file, you may request a copy from help@hdfgroup.org.     *
00014  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
00015 
00016 /*
00017  * Programmer: John Mainzer -- 10/12/04
00018  *
00019  * Purpose:     This file contains declarations which are normally visible
00020  *              only within the H5C package (just H5C.c at present).
00021  *
00022  *              Source files outside the H5C package should include
00023  *              H5Cprivate.h instead.
00024  *
00025  *              The one exception to this rule is test/cache.c.  The test
00026  *              code is easier to write if it can look at the cache's
00027  *              internal data structures.  Indeed, this is the main
00028  *              reason why this file was created.
00029  */
00030 
00031 #ifndef H5C_PACKAGE
00032 #error "Do not include this file outside the H5HL package!"
00033 #endif
00034 
00035 #ifndef _H5Cpkg_H
00036 #define _H5Cpkg_H
00037 
00038 
00039 /* Get package's private header */
00040 #include "H5Cprivate.h"
00041 
00042 
00043 /* Get needed headers */
00044 #include "H5SLprivate.h"        /* Skip lists */
00045 
00046 /* With the introduction of the fractal heap, it is now possible for
00047  * entries to be dirtied, resized, and/or renamed in the flush callbacks.
00048  * As a result, on flushes, it may be necessary to make multiple passes
00049  * through the slist before it is empty.  The H5C__MAX_PASSES_ON_FLUSH
00050  * #define is used to set an upper limit on the number of passes.
00051  * The current value was obtained via personal communication with
00052  * Quincey.  I have applied a fudge factor of 2.
00053  */
00054 
00055 #define H5C__MAX_PASSES_ON_FLUSH        4
00056 
00057 
00058 #define H5C__HASH_TABLE_LEN     (64 * 1024) /* must be a power of 2 */
00059 
00060 
00061 /****************************************************************************
00062  *
00063  * structure H5C_t
00064  *
00065  * Catchall structure for all variables specific to an instance of the cache.
00066  *
00067  * While the individual fields of the structure are discussed below, the
00068  * following overview may be helpful.
00069  *
00070  * Entries in the cache are stored in an instance of H5TB_TREE, indexed on
00071  * the entry's disk address.  While the H5TB_TREE is less efficient than
00072  * hash table, it keeps the entries in address sorted order.  As flushes
00073  * in parallel mode are more efficient if they are issued in increasing
00074  * address order, this is a significant benefit.  Also the H5TB_TREE code
00075  * was readily available, which reduced development time.
00076  *
00077  * While the cache was designed with multiple replacement policies in mind,
00078  * at present only a modified form of LRU is supported.
00079  *
00080  *                                              JRM - 4/26/04
00081  *
00082  * Profiling has indicated that searches in the instance of H5TB_TREE are
00083  * too expensive.  To deal with this issue, I have augmented the cache
00084  * with a hash table in which all entries will be stored.  Given the
00085  * advantages of flushing entries in increasing address order, the TBBT
00086  * is retained, but only dirty entries are stored in it.  At least for
00087  * now, we will leave entries in the TBBT after they are flushed.
00088  *
00089  * Note that index_size and index_len now refer to the total size of
00090  * and number of entries in the hash table.
00091  *
00092  *                                              JRM - 7/19/04
00093  *
00094  * The TBBT has since been replaced with a skip list.  This change
00095  * greatly predates this note.
00096  *
00097  *                                              JRM - 9/26/05
00098  *
00099  * magic:       Unsigned 32 bit integer always set to H5C__H5C_T_MAGIC.  This
00100  *              field is used to validate pointers to instances of H5C_t.
00101  *
00102  * flush_in_progress: Boolean flag indicating whether a flush is in
00103  *              progress.
00104  *
00105  * trace_file_ptr:  File pointer pointing to the trace file, which is used
00106  *              to record cache operations for use in simulations and design
00107  *              studies.  This field will usually be NULL, indicating that
00108  *              no trace file should be recorded.
00109  *
00110  *              Since much of the code supporting the parallel metadata
00111  *              cache is in H5AC, we don't write the trace file from
00112  *              H5C.  Instead, H5AC reads the trace_file_ptr as needed.
00113  *
00114  *              When we get to using H5C in other places, we may add
00115  *              code to write trace file data at the H5C level as well.
00116  *
00117  * aux_ptr:     Pointer to void used to allow wrapper code to associate
00118  *              its data with an instance of H5C_t.  The H5C cache code
00119  *              sets this field to NULL, and otherwise leaves it alone.
00120  *
00121  * max_type_id: Integer field containing the maximum type id number assigned
00122  *              to a type of entry in the cache.  All type ids from 0 to
00123  *              max_type_id inclusive must be defined.  The names of the
00124  *              types are stored in the type_name_table discussed below, and
00125  *              indexed by the ids.
00126  *
00127  * type_name_table_ptr: Pointer to an array of pointer to char of length
00128  *              max_type_id + 1.  The strings pointed to by the entries
00129  *              in the array are the names of the entry types associated
00130  *              with the indexing type IDs.
00131  *
00132  * max_cache_size:  Nominal maximum number of bytes that may be stored in the
00133  *              cache.  This value should be viewed as a soft limit, as the
00134  *              cache can exceed this value under the following circumstances:
00135  *
00136  *              a) All entries in the cache are protected, and the cache is
00137  *                 asked to insert a new entry.  In this case the new entry
00138  *                 will be created.  If this causes the cache to exceed
00139  *                 max_cache_size, it will do so.  The cache will attempt
00140  *                 to reduce its size as entries are unprotected.
00141  *
00142  *              b) When running in parallel mode, the cache may not be
00143  *                 permitted to flush a dirty entry in response to a read.
00144  *                 If there are no clean entries available to evict, the
00145  *                 cache will exceed its maximum size.  Again the cache
00146  *                 will attempt to reduce its size to the max_cache_size
00147  *                 limit on the next cache write.
00148  *
00149  *              c) When an entry increases in size, the cache may exceed
00150  *                 the max_cache_size limit until the next time the cache
00151  *                 attempts to load or insert an entry.
00152  *
00153  * min_clean_size: Nominal minimum number of clean bytes in the cache.
00154  *              The cache attempts to maintain this number of bytes of
00155  *              clean data so as to avoid case b) above.  Again, this is
00156  *              a soft limit.
00157  *
00158  *
00159  * In addition to the call back functions required for each entry, the
00160  * cache requires the following call back functions for this instance of
00161  * the cache as a whole:
00162  *
00163  * check_write_permitted:  In certain applications, the cache may not
00164  *              be allowed to write to disk at certain time.  If specified,
00165  *              the check_write_permitted function is used to determine if
00166  *              a write is permissible at any given point in time.
00167  *
00168  *              If no such function is specified (i.e. this field is NULL),
00169  *              the cache uses the following write_permitted field to
00170  *              determine whether writes are permitted.
00171  *
00172  * write_permitted: If check_write_permitted is NULL, this boolean flag
00173  *              indicates whether writes are permitted.
00174  *
00175  * log_flush:   If provided, this function is called whenever a dirty
00176  *              entry is flushed to disk.
00177  *
00178  *
00179  * In cases where memory is plentiful, and performance is an issue, it
00180  * is useful to disable all cache evictions, and thereby postpone metadata
00181  * writes.  The following field is used to implement this.
00182  *
00183  * evictions_enabled:  Boolean flag that is initialized to TRUE.  When
00184  *              this flag is set to FALSE, the metadata cache will not
00185  *              attempt to evict entries to make space for newly protected
00186  *              entries, and instead the will grow without limit.
00187  *
00188  *              Needless to say, this feature must be used with care.
00189  *
00190  *
00191  * The cache requires an index to facilitate searching for entries.  The
00192  * following fields support that index.
00193  *
00194  * index_len:   Number of entries currently in the hash table used to index
00195  *              the cache.
00196  *
00197  * index_size:  Number of bytes of cache entries currently stored in the
00198  *              hash table used to index the cache.
00199  *
00200  *              This value should not be mistaken for footprint of the
00201  *              cache in memory.  The average cache entry is small, and
00202  *              the cache has a considerable overhead.  Multiplying the
00203  *              index_size by two should yield a conservative estimate
00204  *              of the cache's memory footprint.
00205  *
00206  * clean_index_size: Number of bytes of clean entries currently stored in
00207  *              the hash table.  Note that the index_size field (above)
00208  *              is also the sum of the sizes of all entries in the cache.
00209  *              Thus we should have the invarient that clean_index_size +
00210  *              dirty_index_size == index_size.  
00211  *
00212  *              WARNING:
00213  *
00214  *              1) The clean_index_size field is not maintained by the 
00215  *                 index macros, as the hash table doesn't care whether 
00216  *                 the entry is clean or dirty.  Instead the field is
00217  *                 maintained in the H5C__UPDATE_RP macros.
00218  *
00219  *              2) The value of the clean_index_size must not be mistaken
00220  *                 for the current clean size of the cache.  Rather, the 
00221  *                 clean size of the cache is the current value of 
00222  *                 clean_index_size plus the amount of empty space (if any)
00223  *                 in the cache.
00224  *
00225  * dirty_index_size: Number of bytes of dirty entries currently stored in
00226  *              the hash table.  Note that the index_size field (above)
00227  *              is also the sum of the sizes of all entries in the cache.
00228  *              Thus we should have the invarient that clean_index_size +
00229  *              dirty_index_size == index_size.  
00230  *
00231  *              WARNING:
00232  *
00233  *              1) The dirty_index_size field is not maintained by the 
00234  *                 index macros, as the hash table doesn't care whether 
00235  *                 the entry is clean or dirty.  Instead the field is
00236  *                 maintained in the H5C__UPDATE_RP macros.
00237  *
00238  * index:       Array of pointer to H5C_cache_entry_t of size
00239  *              H5C__HASH_TABLE_LEN.  At present, this value is a power
00240  *              of two, not the usual prime number.
00241  *
00242  *              I hope that the variable size of cache elements, the large
00243  *              hash table size, and the way in which HDF5 allocates space
00244  *              will combine to avoid problems with periodicity.  If so, we
00245  *              can use a trivial hash function (a bit-and and a 3 bit left
00246  *              shift) with some small savings.
00247  *
00248  *              If not, it will become evident in the statistics. Changing
00249  *              to the usual prime number length hash table will require
00250  *              changing the H5C__HASH_FCN macro and the deletion of the
00251  *              H5C__HASH_MASK #define.  No other changes should be required.
00252  *
00253  *
00254  * When we flush the cache, we need to write entries out in increasing
00255  * address order.  An instance of a skip list is used to store dirty entries in
00256  * sorted order.  Whether it is cheaper to sort the dirty entries as needed,
00257  * or to maintain the list is an open question.  At a guess, it depends
00258  * on how frequently the cache is flushed.  We will see how it goes.
00259  *
00260  * For now at least, I will not remove dirty entries from the list as they
00261  * are flushed. (this has been changed -- dirty entries are now removed from
00262  * the skip list as they are flushed.  JRM - 10/25/05)
00263  *
00264  * slist_len:   Number of entries currently in the skip list
00265  *              used to maintain a sorted list of dirty entries in the
00266  *              cache.
00267  *
00268  * slist_size:  Number of bytes of cache entries currently stored in the
00269  *              skip list used to maintain a sorted list of
00270  *              dirty entries in the cache.
00271  *
00272  * slist_ptr:   pointer to the instance of H5SL_t used maintain a sorted
00273  *              list of dirty entries in the cache.  This sorted list has
00274  *              two uses:
00275  *
00276  *              a) It allows us to flush dirty entries in increasing address
00277  *                 order, which results in significant savings.
00278  *
00279  *              b) It facilitates checking for adjacent dirty entries when
00280  *                 attempting to evict entries from the cache.  While we
00281  *                 don't use this at present, I hope that this will allow
00282  *                 some optimizations when I get to it.
00283  *
00284  * With the addition of the fractal heap, the cache must now deal with
00285  * the case in which entries may be dirtied, renamed, or have their sizes
00286  * changed during a flush.  To allow sanity checks in this situation, the
00287  * following two fields have been added.  They are only compiled in when
00288  * H5C_DO_SANITY_CHECKS is TRUE.
00289  *
00290  * slist_len_increase: Number of entries that have been added to the
00291  *              slist since the last time this field was set to zero.
00292  *
00293  * slist_size_increase: Total size of all entries that have been added
00294  *              to the slist since the last time this field was set to
00295  *              zero.
00296  *
00297  *
00298  * When a cache entry is protected, it must be removed from the LRU
00299  * list(s) as it cannot be either flushed or evicted until it is unprotected.
00300  * The following fields are used to implement the protected list (pl).
00301  *
00302  * pl_len:      Number of entries currently residing on the protected list.
00303  *
00304  * pl_size:     Number of bytes of cache entries currently residing on the
00305  *              protected list.
00306  *
00307  * pl_head_ptr: Pointer to the head of the doubly linked list of protected
00308  *              entries.  Note that cache entries on this list are linked
00309  *              by their next and prev fields.
00310  *
00311  *              This field is NULL if the list is empty.
00312  *
00313  * pl_tail_ptr: Pointer to the tail of the doubly linked list of protected
00314  *              entries.  Note that cache entries on this list are linked
00315  *              by their next and prev fields.
00316  *
00317  *              This field is NULL if the list is empty.
00318  *
00319  *
00320  * For very frequently used entries, the protect/unprotect overhead can
00321  * become burdensome.  To avoid this overhead, I have modified the cache
00322  * to allow entries to be "pinned".  A pinned entry is similar to a
00323  * protected entry, in the sense that it cannot be evicted, and that
00324  * the entry can be modified at any time.
00325  *
00326  * Pinning an entry has the following implications:
00327  *
00328  *      1) A pinned entry cannot be evicted.  Thus unprotected
00329  *         pinned entries reside in the pinned entry list, instead
00330  *         of the LRU list(s) (or other lists maintained by the current
00331  *         replacement policy code).
00332  *
00333  *      2) A pinned entry can be accessed or modified at any time.
00334  *         Therefore, the cache must check with the entry owner
00335  *         before flushing it.  If permission is denied, the
00336  *         cache just skips the entry in the flush.
00337  *
00338  *      3) A pinned entry can be marked as dirty (and possibly
00339  *         change size) while it is unprotected.
00340  *
00341  *      4) The flush-destroy code must allow pinned entries to
00342  *         be unpinned (and possibly unprotected) during the
00343  *         flush.
00344  *
00345  * Since pinned entries cannot be evicted, they must be kept on a pinned
00346  * entry list, instead of being entrusted to the replacement policy code.
00347  *
00348  * Maintaining the pinned entry list requires the following fields:
00349  *
00350  * pel_len:     Number of entries currently residing on the pinned
00351  *              entry list.
00352  *
00353  * pel_size:    Number of bytes of cache entries currently residing on
00354  *              the pinned entry list.
00355  *
00356  * pel_head_ptr: Pointer to the head of the doubly linked list of pinned
00357  *              but not protected entries.  Note that cache entries on
00358  *              this list are linked by their next and prev fields.
00359  *
00360  *              This field is NULL if the list is empty.
00361  *
00362  * pel_tail_ptr: Pointer to the tail of the doubly linked list of pinned
00363  *              but not protected entries.  Note that cache entries on
00364  *              this list are linked by their next and prev fields.
00365  *
00366  *              This field is NULL if the list is empty.
00367  *
00368  *
00369  * The cache must have a replacement policy, and the fields supporting this
00370  * policy must be accessible from this structure.
00371  *
00372  * While there has been interest in several replacement policies for
00373  * this cache, the initial development schedule is tight.  Thus I have
00374  * elected to support only a modified LRU policy for the first cut.
00375  *
00376  * To further simplify matters, I have simply included the fields needed
00377  * by the modified LRU in this structure.  When and if we add support for
00378  * other policies, it will probably be easiest to just add the necessary
00379  * fields to this structure as well -- we only create one instance of this
00380  * structure per file, so the overhead is not excessive.
00381  *
00382  *
00383  * Fields supporting the modified LRU policy:
00384  *
00385  * See most any OS text for a discussion of the LRU replacement policy.
00386  *
00387  * When operating in parallel mode, we must ensure that a read does not
00388  * cause a write.  If it does, the process will hang, as the write will
00389  * be collective and the other processes will not know to participate.
00390  *
00391  * To deal with this issue, I have modified the usual LRU policy by adding
00392  * clean and dirty LRU lists to the usual LRU list.
00393  *
00394  * The clean LRU list is simply the regular LRU list with all dirty cache
00395  * entries removed.
00396  *
00397  * Similarly, the dirty LRU list is the regular LRU list with all the clean
00398  * cache entries removed.
00399  *
00400  * When reading in parallel mode, we evict from the clean LRU list only.
00401  * This implies that we must try to ensure that the clean LRU list is
00402  * reasonably well stocked at all times.
00403  *
00404  * We attempt to do this by trying to flush enough entries on each write
00405  * to keep the cLRU_list_size >= min_clean_size.
00406  *
00407  * Even if we start with a completely clean cache, a sequence of protects
00408  * without unprotects can empty the clean LRU list.  In this case, the
00409  * cache must grow temporarily.  At the next write, we will attempt to
00410  * evict enough entries to reduce index_size to less than max_cache_size.
00411  * While this will usually be possible, all bets are off if enough entries
00412  * are protected.
00413  *
00414  * Discussions of the individual fields used by the modified LRU replacement
00415  * policy follow:
00416  *
00417  * LRU_list_len:  Number of cache entries currently on the LRU list.
00418  *
00419  *              Observe that LRU_list_len + pl_len must always equal
00420  *              index_len.
00421  *
00422  * LRU_list_size:  Number of bytes of cache entries currently residing on the
00423  *              LRU list.
00424  *
00425  *              Observe that LRU_list_size + pl_size must always equal
00426  *              index_size.
00427  *
00428  * LRU_head_ptr:  Pointer to the head of the doubly linked LRU list.  Cache
00429  *              entries on this list are linked by their next and prev fields.
00430  *
00431  *              This field is NULL if the list is empty.
00432  *
00433  * LRU_tail_ptr:  Pointer to the tail of the doubly linked LRU list.  Cache
00434  *              entries on this list are linked by their next and prev fields.
00435  *
00436  *              This field is NULL if the list is empty.
00437  *
00438  * cLRU_list_len: Number of cache entries currently on the clean LRU list.
00439  *
00440  *              Observe that cLRU_list_len + dLRU_list_len must always
00441  *              equal LRU_list_len.
00442  *
00443  * cLRU_list_size:  Number of bytes of cache entries currently residing on
00444  *              the clean LRU list.
00445  *
00446  *              Observe that cLRU_list_size + dLRU_list_size must always
00447  *              equal LRU_list_size.
00448  *
00449  * cLRU_head_ptr:  Pointer to the head of the doubly linked clean LRU list.
00450  *              Cache entries on this list are linked by their aux_next and
00451  *              aux_prev fields.
00452  *
00453  *              This field is NULL if the list is empty.
00454  *
00455  * cLRU_tail_ptr:  Pointer to the tail of the doubly linked clean LRU list.
00456  *              Cache entries on this list are linked by their aux_next and
00457  *              aux_prev fields.
00458  *
00459  *              This field is NULL if the list is empty.
00460  *
00461  * dLRU_list_len: Number of cache entries currently on the dirty LRU list.
00462  *
00463  *              Observe that cLRU_list_len + dLRU_list_len must always
00464  *              equal LRU_list_len.
00465  *
00466  * dLRU_list_size:  Number of cache entries currently on the dirty LRU list.
00467  *
00468  *              Observe that cLRU_list_len + dLRU_list_len must always
00469  *              equal LRU_list_len.
00470  *
00471  * dLRU_head_ptr:  Pointer to the head of the doubly linked dirty LRU list.
00472  *              Cache entries on this list are linked by their aux_next and
00473  *              aux_prev fields.
00474  *
00475  *              This field is NULL if the list is empty.
00476  *
00477  * dLRU_tail_ptr:  Pointer to the tail of the doubly linked dirty LRU list.
00478  *              Cache entries on this list are linked by their aux_next and
00479  *              aux_prev fields.
00480  *
00481  *              This field is NULL if the list is empty.
00482  *
00483  *
00484  * Automatic cache size adjustment:
00485  *
00486  * While the default cache size is adequate for most cases, we can run into
00487  * cases where the default is too small.  Ideally, we will let the user
00488  * adjust the cache size as required.  However, this is not possible in all
00489  * cases.  Thus I have added automatic cache size adjustment code.
00490  *
00491  * The configuration for the automatic cache size adjustment is stored in
00492  * the structure described below:
00493  *
00494  * size_increase_possible:  Depending on the configuration data given
00495  *              in the resize_ctl field, it may or may not be possible
00496  *              to increase the size of the cache.  Rather than test for
00497  *              all the ways this can happen, we simply set this flag when
00498  *              we receive a new configuration.
00499  *
00500  * flash_size_increase_possible: Depending on the configuration data given
00501  *              in the resize_ctl field, it may or may not be possible
00502  *              for a flash size increase to occur.  We set this flag
00503  *              whenever we receive a new configuration so as to avoid
00504  *              repeated calculations.
00505  *
00506  * flash_size_increase_threshold: If a flash cache size increase is possible,
00507  *              this field is used to store the minimum size of a new entry
00508  *              or size increase needed to trigger a flash cache size
00509  *              increase.  Note that this field must be updated whenever
00510  *              the size of the cache is changed.
00511  *
00512  * size_decrease_possible:  Depending on the configuration data given
00513  *              in the resize_ctl field, it may or may not be possible
00514  *              to decrease the size of the cache.  Rather than test for
00515  *              all the ways this can happen, we simply set this flag when
00516  *              we receive a new configuration.
00517  *
00518  * cache_full:  Boolean flag used to keep track of whether the cache is
00519  *              full, so we can refrain from increasing the size of a
00520  *              cache which hasn't used up the space alotted to it.
00521  *
00522  *              The field is initialized to FALSE, and then set to TRUE
00523  *              whenever we attempt to make space in the cache.
00524  *
00525  * resize_enabled:  This is another convenience flag which is set whenever
00526  *              a new set of values for resize_ctl are provided.  Very
00527  *              simply,
00528  *
00529  *                  resize_enabled = size_increase_possible ||
00530  *                                   size_decrease_possible;
00531  *
00532  * size_decreased:  Boolean flag set to TRUE whenever the maximun cache
00533  *              size is decreased.  The flag triggers a call to
00534  *              H5C_make_space_in_cache() on the next call to H5C_protect().
00535  *
00536  * resize_ctl:  Instance of H5C_auto_size_ctl_t containing configuration
00537  *              data for automatic cache resizing.
00538  *
00539  * epoch_markers_active:  Integer field containing the number of epoch
00540  *              markers currently in use in the LRU list.  This value
00541  *              must be in the range [0, H5C__MAX_EPOCH_MARKERS - 1].
00542  *
00543  * epoch_marker_active:  Array of boolean of length H5C__MAX_EPOCH_MARKERS.
00544  *              This array is used to track which epoch markers are currently
00545  *              in use.
00546  *
00547  * epoch_marker_ringbuf:  Array of int of length H5C__MAX_EPOCH_MARKERS + 1.
00548  *
00549  *              To manage the epoch marker cache entries, it is necessary
00550  *              to track their order in the LRU list.  This is done with
00551  *              epoch_marker_ringbuf.  When markers are inserted at the
00552  *              head of the LRU list, the index of the marker in the
00553  *              epoch_markers array is inserted at the tail of the ring
00554  *              buffer.  When it becomes the epoch_marker_active'th marker
00555  *              in the LRU list, it will have worked its way to the head
00556  *              of the ring buffer as well.  This allows us to remove it
00557  *              without scanning the LRU list if such is required.
00558  *
00559  * epoch_marker_ringbuf_first: Integer field containing the index of the
00560  *              first entry in the ring buffer.
00561  *
00562  * epoch_marker_ringbuf_last: Integer field containing the index of the
00563  *              last entry in the ring buffer.
00564  *
00565  * epoch_marker_ringbuf_size: Integer field containing the number of entries
00566  *              in the ring buffer.
00567  *
00568  * epoch_markers:  Array of instances of H5C_cache_entry_t of length
00569  *              H5C__MAX_EPOCH_MARKERS.  The entries are used as markers
00570  *              in the LRU list to identify cache entries that haven't
00571  *              been accessed for some (small) specified number of
00572  *              epochs.  These entries (if any) can then be evicted and
00573  *              the cache size reduced -- ideally without evicting any
00574  *              of the current working set.  Needless to say, the epoch
00575  *              length and the number of epochs before an unused entry
00576  *              must be chosen so that all, or almost all, the working
00577  *              set will be accessed before the limit.
00578  *
00579  *              Epoch markers only appear in the LRU list, never in
00580  *              the index or slist.  While they are of type
00581  *              H5C__EPOCH_MARKER_TYPE, and have associated class
00582  *              functions, these functions should never be called.
00583  *
00584  *              The addr fields of these instances of H5C_cache_entry_t
00585  *              are set to the index of the instance in the epoch_markers
00586  *              array, the size is set to 0, and the type field points
00587  *              to the constant structure epoch_marker_class defined
00588  *              in H5C.c.  The next and prev fields are used as usual
00589  *              to link the entry into the LRU list.
00590  *
00591  *              All other fields are unused.
00592  *
00593  *
00594  * Cache hit rate collection fields:
00595  *
00596  * We supply the current cache hit rate on request, so we must keep a
00597  * simple cache hit rate computation regardless of whether statistics
00598  * collection is enabled.  The following fields support this capability.
00599  *
00600  * cache_hits: Number of cache hits since the last time the cache hit
00601  *      rate statistics were reset.  Note that when automatic cache
00602  *      re-sizing is enabled, this field will be reset every automatic
00603  *      resize epoch.
00604  *
00605  * cache_accesses: Number of times the cache has been accessed while
00606  *      since the last since the last time the cache hit rate statistics
00607  *      were reset.  Note that when automatic cache re-sizing is enabled,
00608  *      this field will be reset every automatic resize epoch.
00609  *
00610  *
00611  * Statistics collection fields:
00612  *
00613  * When enabled, these fields are used to collect statistics as described
00614  * below.  The first set are collected only when H5C_COLLECT_CACHE_STATS
00615  * is true.
00616  *
00617  * hits:        Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.  The cells
00618  *              are used to record the number of times an entry with type id
00619  *              equal to the array index has been in cache when requested in
00620  *              the current epoch.
00621  *
00622  * misses:      Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.  The cells
00623  *              are used to record the number of times an entry with type id
00624  *              equal to the array index has not been in cache when
00625  *              requested in the current epoch.
00626  *
00627  * write_protects:  Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.  The
00628  *              cells are used to record the number of times an entry with
00629  *              type id equal to the array index has been write protected
00630  *              in the current epoch.
00631  *
00632  *              Observe that (hits + misses) = (write_protects + read_protects).
00633  *
00634  * read_protects: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.  The cells
00635  *              are used to record the number of times an entry with type id
00636  *              equal to the array index has been read protected in the
00637  *              current epoch.
00638  *
00639  *              Observe that (hits + misses) = (write_protects + read_protects).
00640  *
00641  * max_read_protects:  Array of int32 of length H5C__MAX_NUM_TYPE_IDS + 1.
00642  *              The cells are used to maximum number of simultaneous read
00643  *              protects on any entry with type id equal to the array index
00644  *              in the current epoch.
00645  *
00646  * insertions:  Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.  The cells
00647  *              are used to record the number of times an entry with type
00648  *              id equal to the array index has been inserted into the
00649  *              cache in the current epoch.
00650  *
00651  * pinned_insertions:  Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.
00652  *              The cells are used to record the number of times an entry
00653  *              with type id equal to the array index has been inserted
00654  *              pinned into the cache in the current epoch.
00655  *
00656  * clears:      Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.  The cells
00657  *              are used to record the number of times an entry with type
00658  *              id equal to the array index has been cleared in the current
00659  *              epoch.
00660  *
00661  * flushes:     Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.  The cells
00662  *              are used to record the number of times an entry with type id
00663  *              equal to the array index has been written to disk in the
00664  *              current epoch.
00665  *
00666  * evictions:   Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.  The cells
00667  *              are used to record the number of times an entry with type id
00668  *              equal to the array index has been evicted from the cache in
00669  *              the current epoch.
00670  *
00671  * renames:     Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.  The cells
00672  *              are used to record the number of times an entry with type
00673  *              id equal to the array index has been renamed in the current
00674  *              epoch.
00675  *
00676  * entry_flush_renames: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.
00677  *              The cells are used to record the number of times an entry
00678  *              with type id equal to the array index has been renamed
00679  *              during its flush callback in the current epoch.
00680  *
00681  * cache_flush_renames: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.
00682  *              The cells are used to record the number of times an entry
00683  *              with type id equal to the array index has been renamed
00684  *              during a cache flush in the current epoch.
00685  *
00686  * pins:        Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.  The cells
00687  *              are used to record the number of times an entry with type
00688  *              id equal to the array index has been pinned in the current
00689  *              epoch.
00690  *
00691  * unpins:      Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.  The cells
00692  *              are used to record the number of times an entry with type
00693  *              id equal to the array index has been unpinned in the current
00694  *              epoch.
00695  *
00696  * dirty_pins:  Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.  The cells
00697  *              are used to record the number of times an entry with type
00698  *              id equal to the array index has been marked dirty while pinned
00699  *              in the current epoch.
00700  *
00701  * pinned_flushes:  Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.  The
00702  *              cells are used to record the number of times an  entry
00703  *              with type id equal to the array index has been flushed while
00704  *              pinned in the current epoch.
00705  *
00706  * pinned_cleared:  Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.  The
00707  *              cells are used to record the number of times an  entry
00708  *              with type id equal to the array index has been cleared while
00709  *              pinned in the current epoch.
00710  *
00711  * size_increases:  Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.
00712  *              The cells are used to record the number of times an entry
00713  *              with type id equal to the array index has increased in
00714  *              size in the current epoch.
00715  *
00716  * size_decreases:  Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.
00717  *              The cells are used to record the number of times an entry
00718  *              with type id equal to the array index has decreased in
00719  *              size in the current epoch.
00720  *
00721  * entry_flush_size_changes:  Array of int64 of length
00722  *              H5C__MAX_NUM_TYPE_IDS + 1.  The cells are used to record
00723  *              the number of times an entry with type id equal to the
00724  *              array index has changed size while in its flush callback.
00725  *
00726  * cache_flush_size_changes:  Array of int64 of length
00727  *              H5C__MAX_NUM_TYPE_IDS + 1.  The cells are used to record
00728  *              the number of times an entry with type id equal to the
00729  *              array index has changed size during a cache flush
00730  *
00731  * total_ht_insertions: Number of times entries have been inserted into the
00732  *              hash table in the current epoch.
00733  *
00734  * total_ht_deletions: Number of times entries have been deleted from the
00735  *              hash table in the current epoch.
00736  *
00737  * successful_ht_searches: int64 containing the total number of successful
00738  *              searches of the hash table in the current epoch.
00739  *
00740  * total_successful_ht_search_depth: int64 containing the total number of
00741  *              entries other than the targets examined in successful
00742  *              searches of the hash table in the current epoch.
00743  *
00744  * failed_ht_searches: int64 containing the total number of unsuccessful
00745  *              searches of the hash table in the current epoch.
00746  *
00747  * total_failed_ht_search_depth: int64 containing the total number of
00748  *              entries examined in unsuccessful searches of the hash
00749  *              table in the current epoch.
00750  *
00751  * max_index_len:  Largest value attained by the index_len field in the
00752  *              current epoch.
00753  *
00754  * max_index_size:  Largest value attained by the index_size field in the
00755  *              current epoch.
00756  *
00757  * max_clean_index_size: Largest value attained by the clean_index_size field
00758  *              in the current epoch.
00759  *
00760  * max_dirty_index_size: Largest value attained by the dirty_index_size field
00761  *              in the current epoch.
00762  *
00763  * max_slist_len:  Largest value attained by the slist_len field in the
00764  *              current epoch.
00765  *
00766  * max_slist_size:  Largest value attained by the slist_size field in the
00767  *              current epoch.
00768  *
00769  * max_pl_len:  Largest value attained by the pl_len field in the
00770  *              current epoch.
00771  *
00772  * max_pl_size: Largest value attained by the pl_size field in the
00773  *              current epoch.
00774  *
00775  * max_pel_len: Largest value attained by the pel_len field in the
00776  *              current epoch.
00777  *
00778  * max_pel_size: Largest value attained by the pel_size field in the
00779  *              current epoch.
00780  *
00781  * calls_to_msic: Total number of calls to H5C_make_space_in_cache
00782  *
00783  * total_entries_skipped_in_msic: Number of clean entries skipped while
00784  *              enforcing the min_clean_fraction in H5C_make_space_in_cache().
00785  *
00786  * total_entries_scanned_in_msic: Number of clean entries skipped while
00787  *              enforcing the min_clean_fraction in H5C_make_space_in_cache().
00788  *
00789  * max_entries_skipped_in_msic: Maximum number of clean entries skipped
00790  *              in any one call to H5C_make_space_in_cache().
00791  *
00792  * max_entries_scanned_in_msic: Maximum number of entries scanned over
00793  *              in any one call to H5C_make_space_in_cache().
00794  *
00795  * entries_scanned_to_make_space: Number of entries scanned only when looking
00796  *              for entries to evict in order to make space in cache.
00797 
00798  * The remaining stats are collected only when both H5C_COLLECT_CACHE_STATS
00799  * and H5C_COLLECT_CACHE_ENTRY_STATS are true.
00800  *
00801  * max_accesses: Array of int32 of length H5C__MAX_NUM_TYPE_IDS + 1.  The cells
00802  *              are used to record the maximum number of times any single
00803  *              entry with type id equal to the array index has been
00804  *              accessed in the current epoch.
00805  *
00806  * min_accesses: Array of int32 of length H5C__MAX_NUM_TYPE_IDS + 1.  The cells
00807  *              are used to record the minimum number of times any single
00808  *              entry with type id equal to the array index has been
00809  *              accessed in the current epoch.
00810  *
00811  * max_clears:  Array of int32 of length H5C__MAX_NUM_TYPE_IDS + 1.  The cells
00812  *              are used to record the maximum number of times any single
00813  *              entry with type id equal to the array index has been cleared
00814  *              in the current epoch.
00815  *
00816  * max_flushes: Array of int32 of length H5C__MAX_NUM_TYPE_IDS + 1.  The cells
00817  *              are used to record the maximum number of times any single
00818  *              entry with type id equal to the array index has been
00819  *              flushed in the current epoch.
00820  *
00821  * max_size:    Array of size_t of length H5C__MAX_NUM_TYPE_IDS + 1.  The cells
00822  *              are used to record the maximum size of any single entry
00823  *              with type id equal to the array index that has resided in
00824  *              the cache in the current epoch.
00825  *
00826  * max_pins:    Array of size_t of length H5C__MAX_NUM_TYPE_IDS + 1.  The cells
00827  *              are used to record the maximum number of times that any single
00828  *              entry with type id equal to the array index that has been
00829  *              marked as pinned in the cache in the current epoch.
00830  *
00831  *
00832  * Fields supporting testing:
00833  *
00834  * For test purposes, it is useful to turn off some asserts and sanity
00835  * checks.  The following flags support this.
00836  *
00837  * skip_file_checks:  Boolean flag used to skip sanity checks on file
00838  *              parameters passed to the cache.  In the test bed, there
00839  *              is no reason to have a file open, as the cache proper
00840  *              just passes these parameters through without using them.
00841  *
00842  *              When this flag is set, all sanity checks on the file
00843  *              parameters are skipped.  The field defaults to FALSE.
00844  *
00845  * skip_dxpl_id_checks:  Boolean flag used to skip sanity checks on the
00846  *              dxpl_id parameters passed to the cache.  These are not
00847  *              used directly by the cache, so skipping the checks
00848  *              simplifies the test bed.
00849  *
00850  *              When this flag is set, all sanity checks on the dxpl_id
00851  *              parameters are skipped.  The field defaults to FALSE.
00852  *
00853  * prefix       Array of char used to prefix debugging output.  The
00854  *              field is intended to allow marking of output of with
00855  *              the processes mpi rank.
00856  *
00857  ****************************************************************************/
00858 
00859 #define H5C__H5C_T_MAGIC        0x005CAC0E
00860 #define H5C__MAX_NUM_TYPE_IDS   16
00861 #define H5C__PREFIX_LEN         32
00862 
00863 struct H5C_t
00864 {
00865     uint32_t                    magic;
00866 
00867     hbool_t                     flush_in_progress;
00868 
00869     FILE *                      trace_file_ptr;
00870 
00871     void *                      aux_ptr;
00872 
00873     int32_t                     max_type_id;
00874     const char *                (* type_name_table_ptr);
00875 
00876     size_t                      max_cache_size;
00877     size_t                      min_clean_size;
00878 
00879     H5C_write_permitted_func_t  check_write_permitted;
00880     hbool_t                     write_permitted;
00881 
00882     H5C_log_flush_func_t        log_flush;
00883 
00884     hbool_t                     evictions_enabled;
00885 
00886     int32_t                     index_len;
00887     size_t                      index_size;
00888     size_t                      clean_index_size;
00889     size_t                      dirty_index_size;
00890     H5C_cache_entry_t *         (index[H5C__HASH_TABLE_LEN]);
00891 
00892 
00893     int32_t                     slist_len;
00894     size_t                      slist_size;
00895     H5SL_t *                    slist_ptr;
00896 #if H5C_DO_SANITY_CHECKS
00897     int64_t                     slist_len_increase;
00898     int64_t                     slist_size_increase;
00899 #endif /* H5C_DO_SANITY_CHECKS */
00900 
00901     int32_t                     pl_len;
00902     size_t                      pl_size;
00903     H5C_cache_entry_t *         pl_head_ptr;
00904     H5C_cache_entry_t *         pl_tail_ptr;
00905 
00906     int32_t                     pel_len;
00907     size_t                      pel_size;
00908     H5C_cache_entry_t *         pel_head_ptr;
00909     H5C_cache_entry_t *         pel_tail_ptr;
00910 
00911     int32_t                     LRU_list_len;
00912     size_t                      LRU_list_size;
00913     H5C_cache_entry_t *         LRU_head_ptr;
00914     H5C_cache_entry_t *         LRU_tail_ptr;
00915 
00916     int32_t                     cLRU_list_len;
00917     size_t                      cLRU_list_size;
00918     H5C_cache_entry_t *         cLRU_head_ptr;
00919     H5C_cache_entry_t *         cLRU_tail_ptr;
00920 
00921     int32_t                     dLRU_list_len;
00922     size_t                      dLRU_list_size;
00923     H5C_cache_entry_t *         dLRU_head_ptr;
00924     H5C_cache_entry_t *         dLRU_tail_ptr;
00925 
00926     hbool_t                     size_increase_possible;
00927     hbool_t                     flash_size_increase_possible;
00928     size_t                      flash_size_increase_threshold;
00929     hbool_t                     size_decrease_possible;
00930     hbool_t                     resize_enabled;
00931     hbool_t                     cache_full;
00932     hbool_t                     size_decreased;
00933     H5C_auto_size_ctl_t         resize_ctl;
00934 
00935     int32_t                     epoch_markers_active;
00936     hbool_t                     epoch_marker_active[H5C__MAX_EPOCH_MARKERS];
00937     int32_t                     epoch_marker_ringbuf[H5C__MAX_EPOCH_MARKERS+1];
00938     int32_t                     epoch_marker_ringbuf_first;
00939     int32_t                     epoch_marker_ringbuf_last;
00940     int32_t                     epoch_marker_ringbuf_size;
00941     H5C_cache_entry_t           epoch_markers[H5C__MAX_EPOCH_MARKERS];
00942 
00943     int64_t                     cache_hits;
00944     int64_t                     cache_accesses;
00945 
00946 #if H5C_COLLECT_CACHE_STATS
00947 
00948     /* stats fields */
00949     int64_t                     hits[H5C__MAX_NUM_TYPE_IDS + 1];
00950     int64_t                     misses[H5C__MAX_NUM_TYPE_IDS + 1];
00951     int64_t                     write_protects[H5C__MAX_NUM_TYPE_IDS + 1];
00952     int64_t                     read_protects[H5C__MAX_NUM_TYPE_IDS + 1];
00953     int32_t                     max_read_protects[H5C__MAX_NUM_TYPE_IDS + 1];
00954     int64_t                     insertions[H5C__MAX_NUM_TYPE_IDS + 1];
00955     int64_t                     pinned_insertions[H5C__MAX_NUM_TYPE_IDS + 1];
00956     int64_t                     clears[H5C__MAX_NUM_TYPE_IDS + 1];
00957     int64_t                     flushes[H5C__MAX_NUM_TYPE_IDS + 1];
00958     int64_t                     evictions[H5C__MAX_NUM_TYPE_IDS + 1];
00959     int64_t                     renames[H5C__MAX_NUM_TYPE_IDS + 1];
00960     int64_t                     entry_flush_renames[H5C__MAX_NUM_TYPE_IDS + 1];
00961     int64_t                     cache_flush_renames[H5C__MAX_NUM_TYPE_IDS + 1];
00962     int64_t                     pins[H5C__MAX_NUM_TYPE_IDS + 1];
00963     int64_t                     unpins[H5C__MAX_NUM_TYPE_IDS + 1];
00964     int64_t                     dirty_pins[H5C__MAX_NUM_TYPE_IDS + 1];
00965     int64_t                     pinned_flushes[H5C__MAX_NUM_TYPE_IDS + 1];
00966     int64_t                     pinned_clears[H5C__MAX_NUM_TYPE_IDS + 1];
00967     int64_t                     size_increases[H5C__MAX_NUM_TYPE_IDS + 1];
00968     int64_t                     size_decreases[H5C__MAX_NUM_TYPE_IDS + 1];
00969     int64_t                     entry_flush_size_changes
00970                                         [H5C__MAX_NUM_TYPE_IDS + 1];
00971     int64_t                     cache_flush_size_changes
00972                                         [H5C__MAX_NUM_TYPE_IDS + 1];
00973 
00974     int64_t                     total_ht_insertions;
00975     int64_t                     total_ht_deletions;
00976     int64_t                     successful_ht_searches;
00977     int64_t                     total_successful_ht_search_depth;
00978     int64_t                     failed_ht_searches;
00979     int64_t                     total_failed_ht_search_depth;
00980 
00981     int32_t                     max_index_len;
00982     size_t                      max_index_size;
00983     size_t                      max_clean_index_size;
00984     size_t                      max_dirty_index_size;
00985 
00986     int32_t                     max_slist_len;
00987     size_t                      max_slist_size;
00988 
00989     int32_t                     max_pl_len;
00990     size_t                      max_pl_size;
00991 
00992     int32_t                     max_pel_len;
00993     size_t                      max_pel_size;
00994 
00995     int64_t                     calls_to_msic;
00996     int64_t                     total_entries_skipped_in_msic;
00997     int64_t                     total_entries_scanned_in_msic;
00998     int32_t                     max_entries_skipped_in_msic;
00999     int32_t                     max_entries_scanned_in_msic;
01000     int64_t                     entries_scanned_to_make_space;
01001 
01002 #if H5C_COLLECT_CACHE_ENTRY_STATS
01003 
01004     int32_t                     max_accesses[H5C__MAX_NUM_TYPE_IDS + 1];
01005     int32_t                     min_accesses[H5C__MAX_NUM_TYPE_IDS + 1];
01006     int32_t                     max_clears[H5C__MAX_NUM_TYPE_IDS + 1];
01007     int32_t                     max_flushes[H5C__MAX_NUM_TYPE_IDS + 1];
01008     size_t                      max_size[H5C__MAX_NUM_TYPE_IDS + 1];
01009     int32_t                     max_pins[H5C__MAX_NUM_TYPE_IDS + 1];
01010 
01011 #endif /* H5C_COLLECT_CACHE_ENTRY_STATS */
01012 
01013 #endif /* H5C_COLLECT_CACHE_STATS */
01014 
01015     hbool_t                     skip_file_checks;
01016     hbool_t                     skip_dxpl_id_checks;
01017     char                        prefix[H5C__PREFIX_LEN];
01018 };
01019 
01020 #endif /* _H5Cpkg_H */
01021