35 #define H5S_MAX_RANK 32 36 #define H5O_LAYOUT_NDIMS (H5S_MAX_RANK+1) 37 #include <H5Ppublic.h> 38 #include <H5Dpublic.h> 39 #include <H5Epublic.h> 40 #include <H5Zpublic.h> 41 #include <H5Spublic.h> 54 typedef struct H5D_chunk_rec_t {
55 hsize_t scaled[H5O_LAYOUT_NDIMS];
62 #include <D4Attributes.h> 64 #include <D4ParserSax2.h> 67 #include <TheBESKeys.h> 70 #include <BESNotFoundError.h> 71 #include <BESInternalError.h> 73 #include "DmrppTypeFactory.h" 74 #include "DmrppD4Group.h" 75 #include "DmrppMetadataStore.h" 79 using namespace dmrpp;
81 static bool verbose =
false;
82 #define VERBOSE(x) do { if (verbose) x; } while(false) 84 #define DEBUG_KEY "metadata_store,dmrpp_store,dmrpp" 85 #define ROOT_DIRECTORY "BES.Catalog.catalog.RootDirectory" 97 static void print_dataset_type_info(hid_t dataset, uint8_t layout_type)
99 hid_t dtype_id = H5Dget_type(dataset);
101 throw BESInternalError(
"Cannot obtain the correct HDF5 datatype.", __FILE__, __LINE__);
104 if (H5Tget_class(dtype_id) == H5T_INTEGER || H5Tget_class(dtype_id) == H5T_FLOAT) {
105 hid_t dcpl_id = H5Dget_create_plist(dataset);
107 throw BESInternalError(
"Cannot obtain the HDF5 dataset creation property list.", __FILE__, __LINE__);
113 H5D_fill_value_t fvalue_status;
114 if (H5Pfill_value_defined(dcpl_id, &fvalue_status) < 0) {
116 throw BESInternalError(
"Cannot obtain the fill value status.", __FILE__, __LINE__);
118 if (fvalue_status == H5D_FILL_VALUE_UNDEFINED) {
120 if (layout_type == 1)
121 cerr <<
" The storage size is 0 and the storage type is contiguous." << endl;
122 else if (layout_type == 2)
123 cerr <<
" The storage size is 0 and the storage type is chunking." << endl;
124 else if (layout_type == 3) cerr <<
" The storage size is 0 and the storage type is compact." << endl;
126 cerr <<
" The Fillvalue is undefined ." << endl;
129 if (layout_type == 1)
130 cerr <<
" The storage size is 0 and the storage type is contiguous." << endl;
131 else if (layout_type == 2)
132 cerr <<
" The storage size is 0 and the storage type is chunking." << endl;
133 else if (layout_type == 3) cerr <<
" The storage size is 0 and the storage type is compact." << endl;
136 size_t fv_size = H5Tget_size(dtype_id);
138 fvalue = (
char*) (malloc(1));
139 else if (fv_size == 2)
140 fvalue = (
char*) (malloc(2));
141 else if (fv_size == 4)
142 fvalue = (
char*) (malloc(4));
143 else if (fv_size == 8) fvalue = (
char*) (malloc(8));
146 if (H5Pget_fill_value(dcpl_id, dtype_id, (
void*) (fvalue)) < 0) {
148 throw BESInternalError(
"Cannot obtain the fill value status.", __FILE__, __LINE__);
150 if (H5Tget_class(dtype_id) == H5T_INTEGER) {
151 H5T_sign_t fv_sign = H5Tget_sign(dtype_id);
153 if (fv_sign == H5T_SGN_NONE) {
154 cerr <<
"This dataset's datatype is unsigned char " << endl;
155 cerr <<
"and the fillvalue is " << *fvalue << endl;
158 cerr <<
"This dataset's datatype is char and the fillvalue is " << *fvalue << endl;
161 else if (fv_size == 2) {
162 if (fv_sign == H5T_SGN_NONE) {
163 cerr <<
"This dataset's datatype is unsigned short and the fillvalue is " << *fvalue << endl;
166 cerr <<
"This dataset's datatype is short and the fillvalue is " << *fvalue << endl;
169 else if (fv_size == 4) {
170 if (fv_sign == H5T_SGN_NONE) {
171 cerr <<
"This dataset's datatype is unsigned int and the fillvalue is " << *fvalue << endl;
174 cerr <<
"This dataset's datatype is int and the fillvalue is " << *fvalue << endl;
177 else if (fv_size == 8) {
178 if (fv_sign == H5T_SGN_NONE) {
179 cerr <<
"This dataset's datatype is unsigned long long and the fillvalue is " << *fvalue << endl;
182 cerr <<
"This dataset's datatype is long long and the fillvalue is " << *fvalue << endl;
186 if (H5Tget_class(dtype_id) == H5T_FLOAT) {
188 cerr <<
"This dataset's datatype is float and the fillvalue is " << *fvalue << endl;
190 else if (fv_size == 8) {
191 cerr <<
"This dataset's datatype is double and the fillvalue is " << *fvalue << endl;
195 if (fvalue != NULL) free(fvalue);
199 <<
"The size of the datatype is greater than 8 bytes, Use HDF5 API H5Pget_fill_value() to retrieve the fill value of this dataset." 210 if (layout_type == 1)
211 cerr <<
" The storage size is 0 and the storage type is contiguous." << endl;
212 else if (layout_type == 2)
213 cerr <<
" The storage size is 0 and the storage type is chunking." << endl;
214 else if (layout_type == 3) cerr <<
" The storage size is 0 and the storage type is compact." << endl;
216 cerr <<
"The datatype is neither float nor integer,use HDF5 API H5Pget_fill_value() to retrieve the fill value of this dataset." << endl;
237 static void set_filter_information(hid_t dataset_id,
DmrppCommon *dc)
239 hid_t plist_id = H5Dget_create_plist(dataset_id);
242 int numfilt = H5Pget_nfilters(plist_id);
243 VERBOSE(cerr <<
"Number of filters associated with dataset: " << numfilt << endl);
245 for (
int filter = 0; filter < numfilt; filter++) {
247 unsigned int flags, filter_info;
248 H5Z_filter_t filter_type = H5Pget_filter2(plist_id, filter, &flags, &nelmts, NULL, 0, NULL, &filter_info);
249 VERBOSE(cerr <<
"Filter Type: ");
251 switch (filter_type) {
252 case H5Z_FILTER_DEFLATE:
253 VERBOSE(cerr <<
"H5Z_FILTER_DEFLATE" << endl);
256 case H5Z_FILTER_SHUFFLE:
257 VERBOSE(cerr <<
"H5Z_FILTER_SHUFFLE" << endl);
261 ostringstream oss(
"Unsupported HDF5 filter: ", std::ios::ate);
286 static void get_variable_chunk_info(hid_t dataset,
DmrppCommon *dc)
289 hid_t dcpl = H5Dget_create_plist(dataset);
290 uint8_t layout_type = H5Pget_layout(dcpl);
292 hid_t fspace_id = H5Dget_space(dataset);
294 unsigned int dataset_rank = H5Sget_simple_extent_ndims(fspace_id);
297 switch (layout_type) {
299 case H5D_CONTIGUOUS: {
300 haddr_t cont_addr = 0;
301 hsize_t cont_size = 0;
302 VERBOSE(cerr <<
"Storage: contiguous" << endl);
304 cont_addr = H5Dget_offset(dataset);
308 cont_size = H5Dget_storage_size(dataset);
310 throw BESInternalError(
"Cannot obtain the storage size.", __FILE__, __LINE__);
312 VERBOSE(cerr <<
" Addr: " << cont_addr << endl);
313 VERBOSE(cerr <<
" Size: " << cont_size << endl);
315 if (dc) dc->
add_chunk(
"", cont_size, cont_addr,
"" );
321 hsize_t num_chunks = 0;
322 herr_t status = H5Dget_num_chunks(dataset, fspace_id, &num_chunks);
328 VERBOSE(cerr <<
"storage: chunked." << endl);
329 VERBOSE(cerr <<
"Number of chunks is " << num_chunks << endl);
332 set_filter_information(dataset, dc);
335 vector<size_t> chunk_dims(dataset_rank);
336 unsigned int chunk_rank = H5Pget_chunk(dcpl, dataset_rank, (hsize_t*) &chunk_dims[0]);
337 if (chunk_rank != dataset_rank)
338 throw BESNotFoundError(
"Found a chunk with rank different than the dataset's (aka variables's) rank", __FILE__, __LINE__);
342 for (
unsigned int i = 0; i < num_chunks; ++i) {
344 vector<hsize_t> temp_coords(dataset_rank);
345 vector<unsigned int> chunk_coords(dataset_rank);
351 status = H5Dget_chunk_info(dataset, fspace_id, i, &temp_coords[0], NULL, &addr, &size);
353 VERBOSE(cerr <<
"ERROR" << endl);
354 throw BESInternalError(
"Cannot get HDF5 dataset storage info.", __FILE__, __LINE__);
357 VERBOSE(cerr <<
"chk_idk: " << i <<
", addr: " << addr <<
", size: " << size << endl);
361 for (
unsigned int j = 0; j < chunk_coords.size(); ++j) {
362 chunk_coords[j] = temp_coords[j];
368 if (dc) dc->
add_chunk(
"", size, addr, chunk_coords);
376 VERBOSE(cerr <<
"Storage: compact" << endl);
377 size_t comp_size = 0;
381 comp_size = H5Dget_storage_size(dataset);
383 throw BESInternalError(
"Cannot obtain the compact storage size.", __FILE__, __LINE__);
385 VERBOSE(cerr <<
" Size: " << comp_size << endl);
391 ostringstream oss(
"Unsupported HDF5 dataset layout type: ", std::ios::ate);
392 oss << layout_type <<
".";
412 static void get_chunks_for_all_variables(hid_t file, D4Group *group)
415 for (Constructor::Vars_iter v = group->var_begin(), ve = group->var_end(); v != ve; ++v) {
418 D4Attributes *d4_attrs = (*v)->attributes();
420 throw BESInternalError(
"Expected to find an attribute table for " + (*v)->name() +
" but did not.", __FILE__, __LINE__);
425 D4Attribute *attr = d4_attrs->get(
"fullnamepath");
427 if (attr && attr->num_values() == 1)
428 FQN = attr->value(0);
432 VERBOSE(cerr <<
"Working on: " << FQN << endl);
433 hid_t dataset = H5Dopen2(file, FQN.c_str(), H5P_DEFAULT);
437 if (dataset < 0 && attr == 0)
439 else if (dataset < 0)
440 throw BESInternalError(
"HDF5 dataset '" + FQN +
"' cannot be opened.", __FILE__, __LINE__);
442 get_variable_chunk_info(dataset, dynamic_cast<DmrppCommon*>(*v));
446 D4Group::groupsIter g = group->grp_begin();
447 D4Group::groupsIter ge = group->grp_end();
449 get_chunks_for_all_variables(file, *g++);
452 int main(
int argc,
char*argv[])
454 string h5_file_name =
"";
455 string h5_dset_path =
"";
456 string dmr_name =
"";
457 string url_name =
"";
460 GetOpt getopt(argc, argv,
"c:f:r:u:dhv");
462 while ((option_char = getopt()) != -1) {
463 switch (option_char) {
473 h5_file_name = getopt.optarg;
476 dmr_name = getopt.optarg;
479 url_name = getopt.optarg;
485 cerr <<
"build_dmrpp [-v] -c <bes.conf> -f <data file> [-u <href url>] | build_dmrpp -f <data file> -r <dmr file> | build_dmrpp -h" << endl;
492 if (h5_file_name.empty()) {
493 cerr <<
"HDF5 file name must be given (-f <input>)." << endl;
505 if (!dmr_name.empty()) {
507 auto_ptr<DMRpp> dmrpp(
new DMRpp);
509 dmrpp->set_factory(&dtf);
511 ifstream in(dmr_name.c_str());
513 parser.intern(in, dmrpp.get(),
false);
516 file = H5Fopen(h5_file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
518 cerr <<
"Error: HDF5 file '" + h5_file_name +
"' cannot be opened." << endl;
523 get_chunks_for_all_variables(file, dmrpp->root());
526 dmrpp->print_dmrpp(writer, url_name);
528 cout << writer.get_doc();
532 string bes_data_root;
536 cerr <<
"Error: Could not find the BES root directory key." << endl;
548 cerr <<
"The Metadata Store (MDS) must be configured for this command to work." << endl;
561 auto_ptr<DMRpp> dmrpp(dynamic_cast<DMRpp*>(mds->
get_dmr_object(h5_file_name )));
563 cerr <<
"Expected a DMR++ object from the DmrppMetadataStore." << endl;
568 file = H5Fopen(h5_file_path.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
570 cerr <<
"Error: HDF5 file '" + h5_file_path +
"' cannot be opened." << endl;
574 get_chunks_for_all_variables(file, dmrpp->root());
576 dmrpp->set_href(url_name);
578 mds->add_dmrpp_response(dmrpp.get(), h5_file_name );
581 dmrpp->set_print_chunks(
true);
582 dmrpp->print_dap4(writer);
584 cout << writer.get_doc();
587 cerr <<
"Error: Could not get a lock on the DMR for '" + h5_file_path +
"'." << endl;
596 catch (std::exception &e) {
597 cerr <<
"std::exception: " << e.what() << endl;
601 cerr <<
"Unknown error." << endl;
error thrown if the resource requested cannot be found
exception thrown if inernal error encountered
Provide a way to print the DMR++ response.
void set_shuffle(bool value)
Set the value of the shuffle property.
virtual std::string get_message()
get the error message for this exception
void set_deflate(bool value)
Set the value of the deflate property.
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
static void SetUp(const std::string &values)
Sets up debugging for the bes.
Abstract exception class for the BES with basic string message.
static TheBESKeys * TheKeys()
void set_chunk_dimension_sizes(const std::vector< size_t > &chunk_dims)
Set the value of the chunk dimension sizes given a vector of HDF5 hsize_t.
virtual unsigned long add_chunk(const std::string &data_url, unsigned long long size, unsigned long long offset, std::string position_in_array="")
Add a new chunk as defined by an h4:byteStream element.
Size and offset information of data included in DMR++ files.
static string assemblePath(const string &firstPart, const string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
static std::string ConfigFile