00001 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 00002 * Copyright by The HDF Group. * 00003 * Copyright by the Board of Trustees of the University of Illinois. * 00004 * All rights reserved. * 00005 * * 00006 * This file is part of HDF5. The full HDF5 copyright notice, including * 00007 * terms governing use, modification, and redistribution, is contained in * 00008 * the files COPYING and Copyright.html. COPYING can be found at the root * 00009 * of the source code distribution tree; Copyright.html can be found at the * 00010 * root level of an installed copy of the electronic HDF5 document set and * 00011 * is linked from the top-level documents page. It can also be found at * 00012 * http://hdfgroup.org/HDF5/doc/Copyright.html. If you do not have * 00013 * access to either file, you may request a copy from help@hdfgroup.org. * 00014 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 00015 00016 /* 00017 * Programmer: John Mainzer -- 4/19/06 00018 * 00019 * Purpose: This file contains declarations which are normally visible 00020 * only within the H5AC package (just H5AC.c at present). 00021 * 00022 * Source files outside the H5AC package should include 00023 * H5ACprivate.h instead. 00024 * 00025 * The one exception to this rule is testpar/t_cache.c. The 00026 * test code is easier to write if it can look at H5AC_aux_t. 00027 * Indeed, this is the main reason why this file was created. 00028 */ 00029 00030 #ifndef H5AC_PACKAGE 00031 #error "Do not include this file outside the H5AC package!" 00032 #endif 00033 00034 #ifndef _H5ACpkg_H 00035 #define _H5ACpkg_H 00036 00037 /* Get package's private header */ 00038 #include "H5ACprivate.h" 00039 00040 00041 /* Get needed headers */ 00042 #include "H5Cprivate.h" /* Cache */ 00043 #include "H5SLprivate.h" /* Skip lists */ 00044 00045 00046 #define H5AC_DEBUG_DIRTY_BYTES_CREATION 0 00047 00048 /*------------------------------------------------------------------------- 00049 * It is a bit difficult to set ranges of allowable values on the 00050 * dirty_bytes_threshold field of H5AC_aux_t. The following are 00051 * probably broader than they should be. 00052 *------------------------------------------------------------------------- 00053 */ 00054 00055 #define H5AC__MIN_DIRTY_BYTES_THRESHOLD (int32_t) \ 00056 (H5C__MIN_MAX_CACHE_SIZE / 2) 00057 #define H5AC__DEFAULT_DIRTY_BYTES_THRESHOLD (256 * 1024) 00058 #define H5AC__MAX_DIRTY_BYTES_THRESHOLD (int32_t) \ 00059 (H5C__MAX_MAX_CACHE_SIZE / 4) 00060 00061 /**************************************************************************** 00062 * 00063 * structure H5AC_aux_t 00064 * 00065 * While H5AC has become a wrapper for the cache implemented in H5C.c, there 00066 * are some features of the metadata cache that are specific to it, and which 00067 * therefore do not belong in the more generic H5C cache code. 00068 * 00069 * In particular, there is the matter of synchronizing writes from the 00070 * metadata cache to disk in the PHDF5 case. 00071 * 00072 * Prior to this update, the presumption was that all metadata caches would 00073 * write the same data at the same time since all operations modifying 00074 * metadata must be performed collectively. Given this assumption, it was 00075 * safe to allow only the writes from process 0 to actually make it to disk, 00076 * while metadata writes from all other processes were discarded. 00077 * 00078 * Unfortunately, this presumption is in error as operations that read 00079 * metadata need not be collective, but can change the location of dirty 00080 * entries in the metadata cache LRU lists. This can result in the same 00081 * metadata write operation triggering writes from the metadata caches on 00082 * some processes, but not all (causing a hang), or in different sets of 00083 * entries being written from different caches (potentially resulting in 00084 * metadata corruption in the file). 00085 * 00086 * To deal with this issue, I decided to apply a paradigm shift to the way 00087 * metadata is written to disk. 00088 * 00089 * With this set of changes, only the metadata cache on process 0 is able 00090 * to write metadata to disk, although metadata caches on all other 00091 * processes can read metadata from disk as before. 00092 * 00093 * To keep all the other caches from getting plugged up with dirty metadata, 00094 * process 0 periodically broadcasts a list of entries that it has flushed 00095 * since that last notice, and which are currently clean. The other caches 00096 * mark these entries as clean as well, which allows them to evict the 00097 * entries as needed. 00098 * 00099 * One obvious problem in this approach is synchronizing the broadcasts 00100 * and receptions, as different caches may see different amounts of 00101 * activity. 00102 * 00103 * The current solution is for the caches to track the number of bytes 00104 * of newly generated dirty metadata, and to broadcast and receive 00105 * whenever this value exceeds some user specified threshold. 00106 * 00107 * Maintaining this count is easy for all processes not on process 0 -- 00108 * all that is necessary is to add the size of the entry to the total 00109 * whenever there is an insertion, a rename of a previously clean entry, 00110 * or whever a previously clean entry is marked dirty in an unprotect. 00111 * 00112 * On process 0, we have to be careful not to count dirty bytes twice. 00113 * If an entry is marked dirty, flushed, and marked dirty again, all 00114 * within a single reporting period, it only th first marking should 00115 * be added to the dirty bytes generated tally, as that is all that 00116 * the other processes will see. 00117 * 00118 * At present, this structure exists to maintain the fields needed to 00119 * implement the above scheme, and thus is only used in the parallel 00120 * case. However, other uses may arise in the future. 00121 * 00122 * Instance of this structure are associated with metadata caches via 00123 * the aux_ptr field of H5C_t (see H5Cpkg.h). The H5AC code is 00124 * responsible for allocating, maintaining, and discarding instances 00125 * of H5AC_aux_t. 00126 * 00127 * The remainder of this header comments documents the individual fields 00128 * of the structure. 00129 * 00130 * JRM - 6/27/05 00131 * 00132 * magic: Unsigned 32 bit integer always set to 00133 * H5AC__H5AC_AUX_T_MAGIC. This field is used to validate 00134 * pointers to instances of H5AC_aux_t. 00135 * 00136 * mpi_comm: MPI communicator associated with the file for which the 00137 * cache has been created. 00138 * 00139 * mpi_rank: MPI rank of this process within mpi_comm. 00140 * 00141 * mpi_size: Number of processes in mpi_comm. 00142 * 00143 * write_permitted: Boolean flag used to control whether the cache 00144 * is permitted to write to file. 00145 * 00146 * dirty_bytes_threshold: Integer field containing the dirty bytes 00147 * generation threashold. Whenever dirty byte creation 00148 * exceeds this value, the metadata cache on process 0 00149 * broadcasts a list of the entries it has flushed since 00150 * the last broadcast (or since the beginning of execution) 00151 * and which are currently clean (if they are still in the 00152 * cache) 00153 * 00154 * Similarly, metadata caches on processes other than process 00155 * 0 will attempt to receive a list of clean entries whenever 00156 * the threshold is exceeded. 00157 * 00158 * dirty_bytes: Integer field containing the number of bytes of dirty 00159 * metadata generated since the beginning of the computation, 00160 * or (more typically) since the last clean entries list 00161 * broadcast. This field is reset to zero after each such 00162 * broadcast. 00163 * 00164 * dirty_bytes_propagations: This field only exists when the 00165 * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. 00166 * 00167 * It is used to track the number of times the cleaned list 00168 * has been propagated from process 0 to the other 00169 * processes. 00170 * 00171 * unprotect_dirty_bytes: This field only exists when the 00172 * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. 00173 * 00174 * It is used to track the number of dirty bytes created 00175 * via unprotect operations since the last time the cleaned 00176 * list was propagated. 00177 * 00178 * unprotect_dirty_bytes_updates: This field only exists when the 00179 * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. 00180 * 00181 * It is used to track the number of times dirty bytes have 00182 * been created via unprotect operations since the last time 00183 * the cleaned list was propagated. 00184 * 00185 * insert_dirty_bytes: This field only exists when the 00186 * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. 00187 * 00188 * It is used to track the number of dirty bytes created 00189 * via insert operations since the last time the cleaned 00190 * list was propagated. 00191 * 00192 * insert_dirty_bytes_updates: This field only exists when the 00193 * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. 00194 * 00195 * It is used to track the number of times dirty bytes have 00196 * been created via insert operations since the last time 00197 * the cleaned list was propagated. 00198 * 00199 * rename_dirty_bytes: This field only exists when the 00200 * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. 00201 * 00202 * It is used to track the number of dirty bytes created 00203 * via rename operations since the last time the cleaned 00204 * list was propagated. 00205 * 00206 * rename_dirty_bytes_updates: This field only exists when the 00207 * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. 00208 * 00209 * It is used to track the number of times dirty bytes have 00210 * been created via rename operations since the last time 00211 * the cleaned list was propagated. 00212 * 00213 * d_slist_ptr: Pointer to an instance of H5SL_t used to maintain a list 00214 * of entries that have been dirtied since the last time they 00215 * were listed in a clean entries broadcast. This list is 00216 * only maintained by the metadata cache on process 0 -- it 00217 * it used to maintain a view of the dirty entries as seen 00218 * by the other caches, so as to keep the dirty bytes count 00219 * in synchronization with them. 00220 * 00221 * Thus on process 0, the dirty_bytes count is incremented 00222 * only if either 00223 * 00224 * 1) an entry is inserted in the metadata cache, or 00225 * 00226 * 2) a previously clean entry is renamed, and it does not 00227 * already appear in the dirty entry list, or 00228 * 00229 * 3) a previously clean entry is unprotected with the 00230 * dirtied flag set and the entry does not already appear 00231 * in the dirty entry list. 00232 * 00233 * Entries are added to the dirty entry list whever they cause 00234 * the dirty bytes count to be increased. They are removed 00235 * when they appear in a clean entries broadcast. Note that 00236 * renames must be reflected in the dirty entry list. 00237 * 00238 * To reitterate, this field is only used on process 0 -- it 00239 * should be NULL on all other processes. 00240 * 00241 * d_slist_len: Integer field containing the number of entries in the 00242 * dirty entry list. This field should always contain the 00243 * value 0 on all processes other than process 0. It exists 00244 * primarily for sanity checking. 00245 * 00246 * c_slist_ptr: Pointer to an instance of H5SL_t used to maintain a list 00247 * of entries that were dirty, have been flushed 00248 * to disk since the last clean entries broadcast, and are 00249 * still clean. Since only process 0 can write to disk, this 00250 * list only exists on process 0. 00251 * 00252 * In essence, this slist is used to assemble the contents of 00253 * the next clean entries broadcast. The list emptied after 00254 * each broadcast. 00255 * 00256 * c_slist_len: Integer field containing the number of entries in the clean 00257 * entries list (*c_slist_ptr). This field should always 00258 * contain the value 0 on all processes other than process 0. 00259 * It exists primarily for sanity checking. 00260 * 00261 * write_done: In the parallel test bed, it is necessary to ensure that 00262 * all writes to the server process from cache 0 complete 00263 * before it enters the barrier call with the other caches. 00264 * 00265 * The write_done callback allows t_cache to do this without 00266 * requiring an ACK on each write. Since these ACKs greatly 00267 * increase the run time on some platforms, this is a 00268 * significant optimization. 00269 * 00270 * This field must be set to NULL when the callback is not 00271 * needed. 00272 * 00273 ****************************************************************************/ 00274 00275 #ifdef H5_HAVE_PARALLEL 00276 00277 #define H5AC__H5AC_AUX_T_MAGIC (unsigned)0x00D0A01 00278 00279 typedef struct H5AC_aux_t 00280 { 00281 uint32_t magic; 00282 00283 MPI_Comm mpi_comm; 00284 00285 int mpi_rank; 00286 00287 int mpi_size; 00288 00289 hbool_t write_permitted; 00290 00291 int32_t dirty_bytes_threshold; 00292 00293 int32_t dirty_bytes; 00294 00295 #if H5AC_DEBUG_DIRTY_BYTES_CREATION 00296 00297 int32_t dirty_bytes_propagations; 00298 00299 int32_t unprotect_dirty_bytes; 00300 int32_t unprotect_dirty_bytes_updates; 00301 00302 int32_t insert_dirty_bytes; 00303 int32_t insert_dirty_bytes_updates; 00304 00305 int32_t rename_dirty_bytes; 00306 int32_t rename_dirty_bytes_updates; 00307 00308 #endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ 00309 00310 H5SL_t * d_slist_ptr; 00311 00312 int32_t d_slist_len; 00313 00314 H5SL_t * c_slist_ptr; 00315 00316 int32_t c_slist_len; 00317 00318 void (* write_done)(void); 00319 00320 } H5AC_aux_t; /* struct H5AC_aux_t */ 00321 00322 #endif /* H5_HAVE_PARALLEL */ 00323 00324 #endif /* _H5ACpkg_H */ 00325