bes  Updated for version 3.20.6
ncdds.cc
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of nc_handler, a data handler for the OPeNDAP data
5 // server.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This is free software; you can redistribute it and/or modify it under the
11 // terms of the GNU Lesser General Public License as published by the Free
12 // Software Foundation; either version 2.1 of the License, or (at your
13 // option) any later version.
14 //
15 // This software is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
18 // License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 
27 // (c) COPYRIGHT URI/MIT 1994-1996
28 // Please read the full copyright statement in the file COPYRIGHT.
29 //
30 // Authors:
31 // reza Reza Nekovei (reza@intcomm.net)
32 
33 // This file contains functions which read the variables and their description
34 // from a netcdf file and build the in-memory DDS. These functions form the
35 // core of the server-side software necessary to extract the DDS from a
36 // netcdf data file.
37 //
38 // It also contains test code which will print the in-memory DDS to
39 // stdout.
40 //
41 // ReZa 10/20/94
42 
43 #include "config_nc.h"
44 
45 #include <cstdio>
46 #include <cstring>
47 #include <iostream>
48 #include <string>
49 #include <algorithm>
50 
51 #include <netcdf.h>
52 
53 #include <DDS.h>
54 #include <mime_util.h>
55 #include <util.h>
56 
57 #include "NCRequestHandler.h"
58 #include "nc_util.h"
59 
60 #include "NCInt32.h"
61 #include "NCUInt32.h"
62 #include "NCInt16.h"
63 #include "NCUInt16.h"
64 #include "NCFloat64.h"
65 #include "NCFloat32.h"
66 #include "NCByte.h"
67 #include "NCArray.h"
68 #include "NCGrid.h"
69 #include "NCStr.h"
70 
71 #include "NCStructure.h"
72 
73 using namespace libdap ;
74 
77 static BaseType *
78 build_scalar(const string &varname, const string &dataset, nc_type datatype)
79 {
80  switch (datatype) {
81 #if NETCDF_VERSION >= 4
82  case NC_STRING:
83 #endif
84  case NC_CHAR:
85  return (new NCStr(varname, dataset));
86 
87  case NC_BYTE:
88  if (NCRequestHandler::get_promote_byte_to_short()) {
89  return (new NCInt16(varname, dataset));
90  }
91  else {
92  return (new NCByte(varname, dataset));
93  }
94 
95  case NC_SHORT:
96  return (new NCInt16(varname, dataset));
97 
98  case NC_INT:
99  return (new NCInt32(varname, dataset));
100 
101 #if NETCDF_VERSION >= 4
102  case NC_UBYTE:
103  // NB: the dods_byte type is unsigned
104  return (new NCByte(varname, dataset));
105 
106  case NC_USHORT:
107  return (new NCUInt16(varname, dataset));
108 
109  case NC_UINT:
110  return (new NCUInt32(varname, dataset));
111 #endif
112  case NC_FLOAT:
113  return (new NCFloat32(varname, dataset));
114 
115  case NC_DOUBLE:
116  return (new NCFloat64(varname, dataset));
117 
118 #if NETCDF_VERSION >= 4
119  case NC_INT64:
120  case NC_UINT64:
121  if (NCRequestHandler::get_ignore_unknown_types())
122  cerr << "The netCDF handler does not currently support 64 bit integers.";
123  else
124  throw Error("The netCDF handler does not currently support 64 bit integers.");
125  break;
126 #endif
127 
128  default:
129  throw InternalErr(__FILE__, __LINE__, "Unknown type (" + long_to_string(datatype) + ") for variable '" + varname + "'");
130  }
131 
132  return 0;
133 }
134 
135 #if 0
136 // Replaced by code in nc_util.cc. jhrg 2/9/12
137 
138 static bool is_user_defined(nc_type type)
139 {
140 #if NETCDF_VERSION >= 4
141  return type >= NC_FIRSTUSERTYPEID;
142 #else
143  return false;
144 #endif
145 }
146 #endif
147 
154 static Grid *build_grid(Array *ar, int ndims, const nc_type array_type,
155  const char map_names[MAX_NC_VARS][MAX_NC_NAME],
156  const nc_type map_types[MAX_NC_VARS],
157  const size_t map_sizes[MAX_VAR_DIMS],
158  vector<string> *all_maps)
159 {
160  // Grids of NC_CHARs are treated as Grids of strings; the outermost
161  // dimension (the char vector) becomes the string.
162  if (array_type == NC_CHAR)
163  --ndims;
164 
165  for (int d = 0; d < ndims; ++d) {
166  ar->append_dim(map_sizes[d], map_names[d]);
167  // Save the map names for latter use, which might not happen...
168  all_maps->push_back(string(map_names[d]));
169  }
170 
171  const string &filename = ar->dataset();
172  Grid *gr = new NCGrid(ar->name(), filename);
173  gr->add_var(ar, libdap::array);
174 
175  // Build and add BaseType/Array instances for the maps
176  for (int d = 0; d < ndims; ++d) {
177  BaseType *local_bt = build_scalar(map_names[d], filename, map_types[d]);
178  NCArray *local_ar = new NCArray(local_bt->name(), filename, local_bt);
179  delete local_bt;
180  local_ar->append_dim(map_sizes[d], map_names[d]);
181  gr->add_var(local_ar, maps);
182  delete local_ar;
183  }
184 
185  return gr;
186 }
187 
188 #if NETCDF_VERSION >= 4
189 
192 static BaseType *build_user_defined(int ncid, int varid, nc_type xtype, const string &dataset,
193  int ndims, int dim_ids[MAX_VAR_DIMS])
194 {
195  size_t size;
196  nc_type base_type;
197  size_t nfields;
198  int class_type;
199  int status = nc_inq_user_type(ncid, xtype, 0/*name*/, &size, &base_type, &nfields, &class_type);
200  if (status != NC_NOERR)
201  throw InternalErr(__FILE__, __LINE__, "Could not get information about a user-defined type (" + long_to_string(status) + ").");
202 
203  switch (class_type) {
204  case NC_COMPOUND: {
205  char var_name[NC_MAX_NAME+1];
206  nc_inq_varname(ncid, varid, var_name);
207 
208  NCStructure *ncs = new NCStructure(var_name, dataset);
209 
210  for (size_t i = 0; i < nfields; ++i) {
211  char field_name[NC_MAX_NAME+1];
212  nc_type field_typeid;
213  int field_ndims;
214  int field_sizes[MAX_NC_DIMS];
215  nc_inq_compound_field(ncid, xtype, i, field_name, 0, &field_typeid, &field_ndims, &field_sizes[0]);
216  BaseType *field;
217  if (is_user_defined_type(ncid, field_typeid)) {
218  //is_user_defined(field_typeid)) {
219  // Odd: 'varid' here seems wrong, but works.
220  field = build_user_defined(ncid, varid, field_typeid, dataset, field_ndims, field_sizes);
221  // Child compound types become anonymous variables but DAP
222  // requires names, so use the type name.
223  char var_name[NC_MAX_NAME+1];
224  nc_inq_compound_name(ncid, field_typeid, var_name);
225  field->set_name(var_name);
226  }
227  else {
228  field = build_scalar(field_name, dataset, field_typeid);
229  }
230  // is this a scalar or an array? Note that an array of CHAR is
231  // a scalar string in netcdf3.
232  if (field_ndims == 0 || (field_ndims == 1 && field_typeid == NC_CHAR)) {
233  ncs->add_var(field);
234  }
235  else {
236  NCArray *ar = new NCArray(field_name, dataset, field);
237  for (int i = 0; i < field_ndims; ++i) {
238  ar->append_dim(field_sizes[i]);
239  }
240  ncs->add_var(ar);
241  }
242  }
243  // Is this an array of a compound (DAP Structure)?
244  if (ndims > 0) {
245  NCArray *ar = new NCArray(var_name, dataset, ncs);
246  for (int i = 0; i < ndims; ++i) {
247  char dimname[NC_MAX_NAME+1];
248  size_t dim_sz;
249  int errstat = nc_inq_dim(ncid, dim_ids[i], dimname, &dim_sz);
250  if (errstat != NC_NOERR) {
251  delete ar;
252  throw InternalErr(__FILE__, __LINE__, string("Failed to read dimension information for the compound variable ") + var_name);
253  }
254 
255  ar->append_dim(dim_sz, dimname);
256  }
257 
258  return ar;
259  }
260  else {
261  return ncs;
262  }
263 
264  break;
265  }
266 
267  case NC_VLEN:
268  if (NCRequestHandler::get_ignore_unknown_types()) {
269  cerr << "in build_user_defined; found a vlen." << endl;
270  return 0;
271  }
272  else
273  throw Error("The netCDF handler does not yet suppor the NC_VLEN type.");
274  break;
275 
276  case NC_OPAQUE: {
277  vector<char> name(NC_MAX_NAME+1);
278  status = nc_inq_varname(ncid, varid, &name[0]);
279  if (status != NC_NOERR)
280  throw InternalErr(__FILE__, __LINE__, "Could not get name of an opaque (" + long_to_string(status) + ").");
281 
282  NCArray *opaque = new NCArray(&name[0], dataset, new NCByte(&name[0], dataset));
283 
284  if (ndims > 0) {
285  for (int i = 0; i < ndims; ++i) {
286  char dimname[NC_MAX_NAME+1];
287  size_t dim_sz;
288  int errstat = nc_inq_dim(ncid, dim_ids[i], dimname, &dim_sz);
289  if (errstat != NC_NOERR) {
290  delete opaque;
291  throw InternalErr(__FILE__, __LINE__, string("Failed to read dimension information for the compound variable ") + &name[0]);
292  }
293  opaque->append_dim(dim_sz, dimname);
294  }
295  }
296  opaque->append_dim(size);
297  return opaque;
298  break;
299  }
300 
301  case NC_ENUM: {
302  nc_type base_nc_type;
303  size_t base_size;
304  status = nc_inq_enum(ncid, xtype, 0 /*&name[0]*/, &base_nc_type, &base_size, 0/*&num_members*/);
305  if (status != NC_NOERR)
306  throw(InternalErr(__FILE__, __LINE__, "Could not get information about an enum(" + long_to_string(status) + ")."));
307 
308  // get the name here - we want the var name and not the type name
309  vector<char> name(MAX_NC_NAME + 1);
310  status = nc_inq_varname(ncid, varid, &name[0]);
311  if (status != NC_NOERR)
312  throw InternalErr(__FILE__, __LINE__, "Could not get name of an opaque (" + long_to_string(status) + ").");
313 
314 
315  BaseType *enum_var = build_scalar(&name[0], dataset, base_nc_type);
316 
317  if (ndims > 0) {
318  NCArray *ar = new NCArray(&name[0], dataset, enum_var);
319 
320  for (int i = 0; i < ndims; ++i) {
321  char dimname[NC_MAX_NAME + 1];
322  size_t dim_sz;
323  int errstat = nc_inq_dim(ncid, dim_ids[i], dimname, &dim_sz);
324  if (errstat != NC_NOERR) {
325  delete ar;
326  throw InternalErr(__FILE__, __LINE__, string("Failed to read dimension information for the compound variable ") + &name[0]);
327  }
328  ar->append_dim(dim_sz, dimname);
329  }
330 
331  return ar;
332  }
333  else {
334  return enum_var;
335  }
336  break;
337  }
338 
339  default:
340  throw InternalErr(__FILE__, __LINE__, "Expected one of NC_COMPOUND, NC_VLEN, NC_OPAQUE or NC_ENUM");
341  }
342 
343  return 0;
344 }
345 #endif
346 
364 static bool find_matching_coordinate_variable(int ncid, int var,
365  char dimname[], size_t dim_sz, nc_type *match_type)
366 {
367  // For netCDF, a Grid's Map must be a netCDF dimension
368  int id;
369  // get the id matching the name.
370  int status = nc_inq_dimid(ncid, dimname, &id);
371  if (status == NC_NOERR) {
372  // get the length, the name was matched above
373  size_t length;
374  status = nc_inq_dimlen(ncid, id, &length);
375  if (status != NC_NOERR) {
376  string msg = "netcdf 3: could not get size for dimension ";
377  msg += long_to_string(id);
378  msg += " in variable ";
379  msg += string(dimname);
380  throw Error(msg);
381  }
382  if (length == dim_sz) {
383  // Both the name and size match and it's a dimension, so we've
384  // found our 'matching coordinate variable'. To get the type,
385  // Must find the variable with the name that matches the dimension.
386  int varid = -1;
387  status = nc_inq_varid(ncid, dimname, &varid);
388  // A variable cannot be its own coordinate variable.
389  // The unlimited dimension does not correspond to a variable,
390  // hence the status error is means the named thing is not a
391  // coordinate; it's not an error as far as the handler is concerned.
392  if (var == varid || status != NC_NOERR)
393  return false;
394 
395  status = nc_inq_vartype(ncid, varid, match_type);
396  if (status != NC_NOERR) {
397  string msg = "netcdf 3: could not get type variable ";
398  msg += string(dimname);
399  throw Error(msg);
400  }
401 
402  return true;
403  }
404  }
405  return false;
406 }
407 
419 static bool is_grid(int ncid, int var, int ndims, const int dim_ids[MAX_VAR_DIMS],
420  size_t map_sizes[MAX_VAR_DIMS],
421  char map_names[MAX_NC_VARS][MAX_NC_NAME],
422  nc_type map_types[MAX_NC_VARS])
423 {
424  // Look at each dimension of the variable.
425  for (int d = 0; d < ndims; ++d) {
426  char dimname[MAX_NC_NAME];
427  size_t dim_sz;
428 
429  int errstat = nc_inq_dim(ncid, dim_ids[d], dimname, &dim_sz);
430  if (errstat != NC_NOERR) {
431  string msg = "netcdf 3: could not get size for dimension ";
432  msg += long_to_string(d);
433  msg += " in variable ";
434  msg += long_to_string(var);
435  throw Error(msg);
436  }
437 
438  nc_type match_type;
439  if (find_matching_coordinate_variable(ncid, var, dimname, dim_sz, &match_type)) {
440  map_types[d] = match_type;
441  map_sizes[d] = dim_sz;
442  strncpy(map_names[d], dimname, MAX_NC_NAME - 1);
443  map_names[d][MAX_NC_NAME - 1] = '\0';
444  }
445  else {
446  return false;
447  }
448  }
449 
450  return true;
451 }
452 
453 static bool is_dimension(const string &name, vector<string> maps)
454 {
455  vector<string>::iterator i = find(maps.begin(), maps.end(), name);
456  if (i != maps.end())
457  return true;
458  else
459  return false;
460 }
461 
462 static NCArray *build_array(BaseType *bt, int ncid, int var,
463  const nc_type array_type, int ndims,
464  const int dim_ids[MAX_NC_DIMS])
465 {
466  NCArray *ar = new NCArray(bt->name(), bt->dataset(), bt);
467 
468  if (array_type == NC_CHAR)
469  --ndims;
470 
471  for (int d = 0; d < ndims; ++d) {
472  char dimname[MAX_NC_NAME];
473  size_t dim_sz;
474  int errstat = nc_inq_dim(ncid, dim_ids[d], dimname, &dim_sz);
475  if (errstat != NC_NOERR) {
476  delete ar;
477  throw Error("netcdf: could not get size for dimension " + long_to_string(d) + " in variable " + long_to_string(var));
478  }
479 
480  ar->append_dim(dim_sz, dimname);
481  }
482 
483  return ar;
484 }
485 
495 static void read_variables(DDS &dds_table, const string &filename, int ncid, int nvars)
496 {
497  // How this function works: The variables are scanned once but because
498  // netCDF includes shared dimensions as variables there are two versions
499  // of this function. One writes out the variables as they are found while
500  // the other writes scalars and Grids as they are found and saves Arrays
501  // for output last. Then, when writing the arrays, it checks to see if
502  // an array variable is also a grid dimension and, if so, does not write
503  // it out (thus in the second version of the function, all arrays appear
504  // after the other variable types and only those arrays that do not
505  // appear as Grid Maps are included.
506 
507  // These two vectors are used to record the ids of array variables and
508  // the names of all of the Grid Map variables
509  vector<int> array_vars;
510  vector<string> all_maps;
511 
512  // These are defined here since they are used by both loops.
513  char name[MAX_NC_NAME];
514  nc_type nctype;
515  int ndims;
516  int dim_ids[MAX_VAR_DIMS];
517 
518  // Examine each variable in the file; if 'elide_grid_maps' is true, adds
519  // only scalars and Grids (Arrays are added in the following loop). If
520  // false, all variables are added in this loop.
521  for (int varid = 0; varid < nvars; ++varid) {
522  int errstat = nc_inq_var(ncid, varid, name, &nctype, &ndims, dim_ids, (int *) 0);
523  if (errstat != NC_NOERR)
524  throw Error("netcdf: could not get name or dimension number for variable " + long_to_string(varid));
525 
526  // These are defined here because they are value-result parameters for
527  // is_grid() called below.
528  size_t map_sizes[MAX_VAR_DIMS];
529  char map_names[MAX_NC_VARS][MAX_NC_NAME];
530  nc_type map_types[MAX_NC_VARS];
531 
532  // a scalar? NB a one-dim NC_CHAR array will have DAP type of
533  // dods_str_c because it's really a scalar string, not an array.
534  if (is_user_defined_type(ncid, nctype)) {
535  // is_user_defined(nctype)) {
536 #if NETCDF_VERSION >= 4
537  BaseType *bt = build_user_defined(ncid, varid, nctype, filename, ndims, dim_ids);
538  dds_table.add_var(bt);
539  delete bt;
540 #endif
541  }
542  else if (ndims == 0 || (ndims == 1 && nctype == NC_CHAR)) {
543  BaseType *bt = build_scalar(name, filename, nctype);
544  dds_table.add_var(bt);
545  delete bt;
546  }
547  else if (is_grid(ncid, varid, ndims, dim_ids, map_sizes, map_names, map_types)) {
548  BaseType *bt = build_scalar(name, filename, nctype);
549  Array *ar = new NCArray(name, filename, bt);
550  delete bt;
551  Grid *gr = build_grid(ar, ndims, nctype, map_names, map_types, map_sizes, &all_maps);
552  delete ar;
553  dds_table.add_var(gr);
554  delete gr;
555  }
556  else {
557  if (!NCRequestHandler::get_show_shared_dims()) {
558  array_vars.push_back(varid);
559  } else {
560  BaseType *bt = build_scalar(name, filename, nctype);
561  NCArray *ar = build_array(bt, ncid, varid, nctype, ndims, dim_ids);
562  delete bt;
563  dds_table.add_var(ar);
564  delete ar;
565  }
566  }
567  }
568 
569  // This code is only run if elide_dimension_arrays is true and in that case the
570  // loop above did not create any simple arrays. Instead it pushed the
571  // var ids of things that look like simple arrays onto a vector. This code
572  // will add all of those that really are arrays and not the ones that are
573  // dimensions used by a Grid.
574  if (!NCRequestHandler::get_show_shared_dims()) {
575  // Now just loop through the saved array variables, writing out only
576  // those that are not Grid Maps
577  nvars = array_vars.size();
578  for (int i = 0; i < nvars; ++i) {
579  int var = array_vars.at(i);
580 
581  int errstat = nc_inq_var(ncid, var, name, &nctype, &ndims,
582  dim_ids, (int *) 0);
583  if (errstat != NC_NOERR) {
584  string msg = "netcdf 3: could not get name or dimension number for variable ";
585  msg += long_to_string(var);
586  throw Error(msg);
587  }
588 
589  // If an array already appears as a Grid Map, don't write it out
590  // as an array too.
591  if (is_dimension(string(name), all_maps))
592  continue;
593 
594  BaseType *bt = build_scalar(name, filename, nctype);
595  Array *ar = build_array(bt, ncid, var, nctype, ndims, dim_ids);
596  delete bt;
597  dds_table.add_var(ar);
598  delete ar;
599  }
600  }
601 }
602 
610 void nc_read_dataset_variables(DDS &dds_table, const string &filename)
611 {
612  ncopts = 0;
613  int ncid, errstat;
614  int nvars;
615 
616  errstat = nc_open(filename.c_str(), NC_NOWRITE, &ncid);
617  if (errstat != NC_NOERR)
618  throw Error(errstat, "Could not open " + filename + ".");
619 
620  // how many variables?
621  errstat = nc_inq_nvars(ncid, &nvars);
622  if (errstat != NC_NOERR)
623  throw Error(errstat, "Could not inquire about netcdf file: " + path_to_filename(filename) + ".");
624 
625  // dataset name
626  dds_table.set_dataset_name(name_path(filename));
627 
628  // read variables' classes
629  read_variables(dds_table, filename, ncid, nvars);
630 
631  if (nc_close(ncid) != NC_NOERR)
632  throw InternalErr(__FILE__, __LINE__, "ncdds: Could not close the dataset!");
633 }
634 
NCUInt32
Definition: NCUInt32.h:46
NCByte
Definition: NCByte.h:46
NCInt16
Definition: NCInt16.h:46
NCUInt16
Definition: NCUInt16.h:46
NCGrid
Definition: NCGrid.h:46
NCFloat64
Definition: NCFloat64.h:46
libdap
Definition: BESDapFunctionResponseCache.h:35
NCStructure
Definition: NCStructure.h:51
NCInt32
Definition: NCInt32.h:46
NCArray
Definition: NCArray.h:51
Error
NCFloat32
Definition: NCFloat32.h:46
NCStr
Definition: NCStr.h:46