bes  Updated for version 3.20.6
BESCatalogUtils.cc
1 // BESCatalogUtils.cc
2 
3 // This file is part of bes, A C++ back-end server implementation framework
4 // for the OPeNDAP Data Access Protocol.
5 
6 // Copyright (c) 2004-2009 University Corporation for Atmospheric Research
7 // Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact University Corporation for Atmospheric Research at
24 // 3080 Center Green Drive, Boulder, CO 80301
25 
26 // (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
27 // Please read the full copyright statement in the file COPYRIGHT_UCAR.
28 //
29 // Authors:
30 // pwest Patrick West <pwest@ucar.edu>
31 // jgarcia Jose Garcia <jgarcia@ucar.edu>
32 
33 #include "config.h"
34 
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <dirent.h>
38 
39 #include <cerrno>
40 #include <iostream>
41 #include <sstream>
42 #include <list>
43 #include <cstring>
44 
45 #include "BESCatalogUtils.h"
46 #include "BESCatalogList.h"
47 #include "TheBESKeys.h"
48 #include "BESInternalError.h"
49 #include "BESSyntaxUserError.h"
50 #include "BESNotFoundError.h"
51 #include "BESRegex.h"
52 #include "BESUtil.h"
53 #include "BESInfo.h"
54 #include "BESContainerStorageList.h"
55 #include "BESContainerStorage.h"
56 #include "BESCatalogEntry.h"
57 
58 using namespace std;
59 
60 #if 0
61 map<string, BESCatalogUtils *> BESCatalogUtils::_instances;
62 #endif
63 
64 
88 BESCatalogUtils::BESCatalogUtils(const string &n, bool strict) :
89  d_name(n), d_follow_syms(false)
90 {
91  string key = "BES.Catalog." + n + ".RootDirectory";
92  bool found = false;
93  TheBESKeys::TheKeys()->get_value(key, d_root_dir, found);
94  if (strict && (!found || d_root_dir == "")) {
95  string s = key + " not defined in BES configuration file";
96  throw BESSyntaxUserError(s, __FILE__, __LINE__);
97  }
98 
99  if(d_root_dir != "UNUSED"){
100  // TODO access() or stat() would test for existence faster. jhrg 2.25.18
101  DIR *dip = opendir(d_root_dir.c_str());
102  if (dip == NULL) {
103  string serr = "BESCatalogDirectory - root directory " + d_root_dir + " does not exist";
104  throw BESNotFoundError(serr, __FILE__, __LINE__);
105  }
106  closedir(dip);
107  }
108 
109  found = false;
110  key = (string) "BES.Catalog." + n + ".Exclude";
111  vector<string> vals;
112  TheBESKeys::TheKeys()->get_values(key, vals, found);
113  vector<string>::iterator ei = vals.begin();
114  vector<string>::iterator ee = vals.end();
115  for (; ei != ee; ei++) {
116  string e_str = (*ei);
117  if (!e_str.empty() && e_str != ";") BESUtil::explode(';', e_str, d_exclude);
118  }
119 
120  key = (string) "BES.Catalog." + n + ".Include";
121  vals.clear();
122  TheBESKeys::TheKeys()->get_values(key, vals, found);
123  vector<string>::iterator ii = vals.begin();
124  vector<string>::iterator ie = vals.end();
125  for (; ii != ie; ii++) {
126  string i_str = (*ii);
127  if (!i_str.empty() && i_str != ";") BESUtil::explode(';', i_str, d_include);
128  }
129 
130  key = "BES.Catalog." + n + ".TypeMatch";
131  list<string> match_list;
132  vals.clear();
133  TheBESKeys::TheKeys()->get_values(key, vals, found);
134  if (strict && (!found || vals.size() == 0)) {
135  string s = key + " not defined in key file";
136  throw BESInternalError(s, __FILE__, __LINE__);
137  }
138  vector<string>::iterator vi = vals.begin();
139  vector<string>::iterator ve = vals.end();
140  for (; vi != ve; vi++) {
141  BESUtil::explode(';', (*vi), match_list);
142  }
143 
144  list<string>::iterator mli = match_list.begin();
145  list<string>::iterator mle = match_list.end();
146  for (; mli != mle; mli++) {
147  if (!((*mli).empty()) && *(mli) != ";") {
148  list<string> amatch;
149  BESUtil::explode(':', (*mli), amatch);
150  if (amatch.size() != 2) {
151  string s = (string) "Catalog type match malformed, " + "looking for type:regexp;[type:regexp;]";
152  throw BESInternalError(s, __FILE__, __LINE__);
153  }
154  list<string>::iterator ami = amatch.begin();
155  handler_regex newval;
156  newval.handler = (*ami);
157  ami++;
158  newval.regex = (*ami);
159  d_match_list.push_back(newval);
160  }
161  }
162 
163  key = (string) "BES.Catalog." + n + ".FollowSymLinks";
164  string s_str;
165  TheBESKeys::TheKeys()->get_value(key, s_str, found);
166  s_str = BESUtil::lowercase(s_str);
167  if (s_str == "yes" || s_str == "on" || s_str == "true") {
168  d_follow_syms = true;
169  }
170 }
171 
183 bool BESCatalogUtils::include(const string &inQuestion) const
184 {
185  bool toInclude = false;
186 
187  // First check the file against the include list. If the file should be
188  // included then check the exclude list to see if there are exceptions
189  // to the include list.
190  if (d_include.size() == 0) {
191  toInclude = true;
192  }
193  else {
194  list<string>::const_iterator i_iter = d_include.begin();
195  list<string>::const_iterator i_end = d_include.end();
196  for (; i_iter != i_end; i_iter++) {
197  string reg = *i_iter;
198  if (!reg.empty()) {
199  try {
200  // must match exactly, meaning result is = to length of string
201  // in question
202  BESRegex reg_expr(reg.c_str());
203  if (reg_expr.match(inQuestion.c_str(), inQuestion.length())
204  == static_cast<int>(inQuestion.length())) {
205  toInclude = true;
206  }
207  }
208  catch (BESError &e) {
209  string serr = (string) "Unable to get catalog information, "
210  + "malformed Catalog Include parameter " + "in bes configuration file around " + reg + ": "
211  + e.get_message();
212  throw BESInternalError(serr, __FILE__, __LINE__);
213  }
214  }
215  }
216  }
217 
218  if (toInclude == true) {
219  if (exclude(inQuestion)) {
220  toInclude = false;
221  }
222  }
223 
224  return toInclude;
225 }
226 
234 bool BESCatalogUtils::exclude(const string &inQuestion) const
235 {
236  list<string>::const_iterator e_iter = d_exclude.begin();
237  list<string>::const_iterator e_end = d_exclude.end();
238  for (; e_iter != e_end; e_iter++) {
239  string reg = *e_iter;
240  if (!reg.empty()) {
241  try {
242  BESRegex reg_expr(reg.c_str());
243  if (reg_expr.match(inQuestion.c_str(), inQuestion.length()) == static_cast<int>(inQuestion.length())) {
244  return true;
245  }
246  }
247  catch (BESError &e) {
248  string serr = (string) "Unable to get catalog information, " + "malformed Catalog Exclude parameter "
249  + "in bes configuration file around " + reg + ": " + e.get_message();
250  throw BESInternalError(serr, __FILE__, __LINE__);
251  }
252  }
253  }
254  return false;
255 }
256 
262 BESCatalogUtils::match_citer BESCatalogUtils::match_list_begin() const
263 {
264  return d_match_list.begin();
265 }
266 
272 BESCatalogUtils::match_citer BESCatalogUtils::match_list_end() const
273 {
274  return d_match_list.end();
275 }
276 
287 unsigned int BESCatalogUtils::get_entries(DIR *dip, const string &fullnode, const string &use_node,
288  BESCatalogEntry *entry, bool dirs_only)
289 {
290  unsigned int cnt = 0;
291 
292  struct stat cbuf;
293  int statret = stat(fullnode.c_str(), &cbuf);
294  if (statret != 0) {
295  if (errno == ENOENT) { // ENOENT means that the path or part of the path does not exist
296  char *s_err = strerror(errno);
297  throw BESNotFoundError((s_err) ? string(s_err) : string("Node ") + use_node + " does not exist", __FILE__,
298  __LINE__);
299  }
300  // any other error means that access is denied for some reason
301  else {
302  char *s_err = strerror(errno);
303  throw BESNotFoundError((s_err) ? string(s_err) : string("Access denied for node ") + use_node, __FILE__,
304  __LINE__);
305  }
306  }
307 
308  struct dirent *dit;
309  while ((dit = readdir(dip)) != NULL) {
310  string dirEntry = dit->d_name;
311  if (dirEntry == "." || dirEntry == "..") {
312  continue;
313  }
314 
315  string fullPath = fullnode + "/" + dirEntry;
316 
317  // Skip this dir entry if it is a sym link and follow links is false
318  if (follow_sym_links() == false) {
319  struct stat lbuf;
320  (void) lstat(fullPath.c_str(), &lbuf);
321  if (S_ISLNK(lbuf.st_mode))
322  continue;
323  }
324 
325  // look at the mode and determine if this is a
326  // directory or a regular file. If it is not
327  // accessible, the stat fails, is not a directory
328  // or regular file, then simply do not include it.
329  struct stat buf;
330  statret = stat(fullPath.c_str(), &buf);
331  if (statret == 0 && S_ISDIR(buf.st_mode)) {
332  if (exclude(dirEntry) == false) {
333  BESCatalogEntry *curr_entry = new BESCatalogEntry(dirEntry, entry->get_catalog());
334 
335  bes_add_stat_info(curr_entry, buf);
336 
337  entry->add_entry(curr_entry);
338 
339  // we don't go further than this, so we need
340  // to add a blank node here so that we know
341  // it's a node (collection)
342  BESCatalogEntry *blank_entry = new BESCatalogEntry(".blank", entry->get_catalog());
343  curr_entry->add_entry(blank_entry);
344  }
345  }
346  else if (statret == 0 && S_ISREG(buf.st_mode)) {
347  if (!dirs_only && include(dirEntry)) {
348  BESCatalogEntry *curr_entry = new BESCatalogEntry(dirEntry, entry->get_catalog());
349  bes_add_stat_info(curr_entry, buf);
350 
351  list<string> services;
352  // TODO use the d_utils object? jhrg 2.26.18
353  isData(fullPath, d_name, services);
354  curr_entry->set_service_list(services);
355 
356  bes_add_stat_info(curr_entry, buf);
357 
358  entry->add_entry(curr_entry);
359  }
360  }
361  } // end of the while loop
362 
363  // TODO this always return zero. FIXME jhrg 2.26.18
364  return cnt;
365 }
366 
367 void BESCatalogUtils::display_entry(BESCatalogEntry *entry, BESInfo *info)
368 {
369  string defcatname = BESCatalogList::TheCatalogList()->default_catalog_name();
370 
371  // start with the external entry
372  map<string, string> props;
373  if (entry->get_catalog() == defcatname) {
374  props["name"] = entry->get_name();
375  }
376  else {
377  string name = entry->get_catalog() + "/";
378  if (entry->get_name() != "/") {
379  name = name + entry->get_name();
380  }
381  props["name"] = name;
382  }
383  props["catalog"] = entry->get_catalog();
384  props["size"] = entry->get_size();
385  props["lastModified"] = entry->get_mod_date() + "T" + entry->get_mod_time();
386  if (entry->is_collection()) {
387  props["node"] = "true";
388  ostringstream strm;
389  strm << entry->get_count();
390  props["count"] = strm.str();
391  }
392  else {
393  props["node"] = "false";
394  }
395  info->begin_tag("dataset", &props);
396 
397  list<string> services = entry->get_service_list();
398  if (services.size()) {
399  list<string>::const_iterator si = services.begin();
400  list<string>::const_iterator se = services.end();
401  for (; si != se; si++) {
402  info->add_tag("serviceRef", (*si));
403  }
404  }
405 }
406 
421 std::string
422 BESCatalogUtils::get_handler_name(const std::string &item) const
423 {
424  for (BESCatalogUtils::match_citer i = match_list_begin(), e = match_list_end(); i != e; ++i) {
425  BESRegex expr((*i).regex.c_str());
426  if (expr.match(item.c_str(), item.length()) == (int)item.length()) {
427  return (*i).handler;
428  }
429  }
430 
431  return "";
432 }
433 
446 bool
447 BESCatalogUtils::is_data(const std::string &item) const
448 {
449  for (BESCatalogUtils::match_citer i = match_list_begin(), e = match_list_end(); i != e; ++i) {
450  BESRegex expr((*i).regex.c_str());
451  if (expr.match(item.c_str(), item.length()) == (int)item.length()) {
452  return true;
453  }
454  }
455 
456  return false;
457 }
458 
466 void BESCatalogUtils::bes_add_stat_info(BESCatalogEntry *entry, const string &fullnode)
467 {
468  struct stat cbuf;
469  int statret = stat(fullnode.c_str(), &cbuf);
470  if (statret == 0) {
471  bes_add_stat_info(entry, cbuf);
472  }
473 }
474 
475 void BESCatalogUtils::bes_add_stat_info(BESCatalogEntry *entry, struct stat &buf)
476 {
477  off_t sz = buf.st_size;
478  entry->set_size(sz);
479 
480  // %T = %H:%M:%S
481  // %F = %Y-%m-%d
482  time_t mod = buf.st_mtime;
483  struct tm *stm = gmtime(&mod);
484  char mdate[64];
485  strftime(mdate, 64, "%Y-%m-%d", stm);
486  char mtime[64];
487  strftime(mtime, 64, "%T", stm);
488 
489  ostringstream sdt;
490  sdt << mdate;
491  entry->set_mod_date(sdt.str());
492 
493  ostringstream stt;
494  stt << mtime;
495  entry->set_mod_time(stt.str());
496 }
497 
498 bool BESCatalogUtils::isData(const string &inQuestion, const string &catalog, list<string> &services)
499 {
500  BESContainerStorage *store = BESContainerStorageList::TheList()->find_persistence(catalog);
501  if (!store) return false;
502 
503  return store->isData(inQuestion, services);
504 }
505 
506 void BESCatalogUtils::dump(ostream &strm) const
507 {
508  strm << BESIndent::LMarg << "BESCatalogUtils::dump - (" << (void *) this << ")" << endl;
509  BESIndent::Indent();
510 
511  strm << BESIndent::LMarg << "root directory: " << d_root_dir << endl;
512 
513  if (d_include.size()) {
514  strm << BESIndent::LMarg << "include list:" << endl;
515  BESIndent::Indent();
516  list<string>::const_iterator i_iter = d_include.begin();
517  list<string>::const_iterator i_end = d_include.end();
518  for (; i_iter != i_end; i_iter++) {
519  if (!(*i_iter).empty()) {
520  strm << BESIndent::LMarg << *i_iter << endl;
521  }
522  }
523  BESIndent::UnIndent();
524  }
525  else {
526  strm << BESIndent::LMarg << "include list: empty" << endl;
527  }
528 
529  if (d_exclude.size()) {
530  strm << BESIndent::LMarg << "exclude list:" << endl;
531  BESIndent::Indent();
532  list<string>::const_iterator e_iter = d_exclude.begin();
533  list<string>::const_iterator e_end = d_exclude.end();
534  for (; e_iter != e_end; e_iter++) {
535  if (!(*e_iter).empty()) {
536  strm << BESIndent::LMarg << *e_iter << endl;
537  }
538  }
539  BESIndent::UnIndent();
540  }
541  else {
542  strm << BESIndent::LMarg << "exclude list: empty" << endl;
543  }
544 
545  if (d_match_list.size()) {
546  strm << BESIndent::LMarg << "type matches:" << endl;
547  BESIndent::Indent();
548  BESCatalogUtils::match_citer i = d_match_list.begin();
549  BESCatalogUtils::match_citer ie = d_match_list.end();
550  for (; i != ie; i++) {
551  handler_regex match = (*i);
552  strm << BESIndent::LMarg << match.handler << " : " << match.regex << endl;
553  }
554  BESIndent::UnIndent();
555  }
556  else {
557  strm << BESIndent::LMarg << " type matches: empty" << endl;
558  }
559 
560  if (d_follow_syms) {
561  strm << BESIndent::LMarg << " follow symbolic links: on" << endl;
562  }
563  else {
564  strm << BESIndent::LMarg << " follow symbolic links: off" << endl;
565  }
566 
567  BESIndent::UnIndent();
568 }
569 
570 #if 0
572 BESCatalogUtils::Utils(const string &cat_name)
573 {
574  BESCatalogUtils *utils = BESCatalogUtils::_instances[cat_name];
575  if (!utils) {
576  utils = new BESCatalogUtils(cat_name);
577  BESCatalogUtils::_instances[cat_name] = utils;
578  }
579  return utils;
580 }
581 #endif
582 
583 
584 #if 0
585 // Added 12/24/12
586 void BESCatalogUtils::delete_all_catalogs()
587 {
588  map<string, BESCatalogUtils*>::iterator i = BESCatalogUtils::_instances.begin();
589  map<string, BESCatalogUtils*>::iterator e = BESCatalogUtils::_instances.end();
590  while (i != e) {
591  delete (*i++).second;
592  }
593 }
594 
595 #endif
596 
BESCatalogUtils::dump
virtual void dump(std::ostream &strm) const
dump the contents of this object to the specified ostream
Definition: BESCatalogUtils.cc:506
BESCatalogUtils::is_data
bool is_data(const std::string &item) const
is there a handler that can process this
Definition: BESCatalogUtils.cc:447
BESContainerStorage
provides persistent storage for data storage information represented by a container.
Definition: BESContainerStorage.h:67
BESCatalogUtils::include
virtual bool include(const std::string &inQuestion) const
Should this file/directory be included in the catalog?
Definition: BESCatalogUtils.cc:183
BESNotFoundError
error thrown if the resource requested cannot be found
Definition: BESNotFoundError.h:40
BESCatalogUtils::exclude
virtual bool exclude(const std::string &inQuestion) const
Should this file/directory be excluded in the catalog?
Definition: BESCatalogUtils.cc:234
BESCatalogUtils
Definition: BESCatalogUtils.h:61
BESError::get_message
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
BESInfo
informational response object
Definition: BESInfo.h:63
BESCatalogList::TheCatalogList
static BESCatalogList * TheCatalogList()
Get the singleton BESCatalogList instance.
Definition: BESCatalogList.cc:81
TheBESKeys::TheKeys
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:62
BESCatalogUtils::get_handler_name
std::string get_handler_name(const std::string &item) const
Find the handler name that will process.
Definition: BESCatalogUtils.cc:422
BESSyntaxUserError
error thrown if there is a user syntax error in the request or any other user error
Definition: BESSyntaxUserError.h:41
BESContainerStorageList::find_persistence
virtual BESContainerStorage * find_persistence(const std::string &persist_name)
find the persistence store with the given name
Definition: BESContainerStorageList.cc:212
BESCatalogUtils::get_entries
virtual unsigned int get_entries(DIR *dip, const std::string &fullnode, const std::string &use_node, BESCatalogEntry *entry, bool dirs_only)
Definition: BESCatalogUtils.cc:287
BESInternalError
exception thrown if internal error encountered
Definition: BESInternalError.h:43
TheBESKeys::get_value
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:272
TheBESKeys::get_values
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
Definition: TheBESKeys.cc:303
BESCatalogEntry
Definition: BESCatalogEntry.h:46
BESUtil::explode
static void explode(char delim, const std::string &str, std::list< std::string > &values)
Definition: BESUtil.cc:561
BESContainerStorage::isData
virtual bool isData(const std::string &inQuestion, std::list< std::string > &provides)=0
determine if the given container is data and what services are available for it
BESUtil::lowercase
static std::string lowercase(const std::string &s)
Definition: BESUtil.cc:200
BESRegex
Definition: BESRegex.h:41
BESError
Abstract exception class for the BES with basic string message.
Definition: BESError.h:58
BESCatalogList::default_catalog_name
virtual std::string default_catalog_name() const
The name of the default catalog.
Definition: BESCatalogList.h:116
BESRegex::match
int match(const char *s, int len, int pos=0)
Does the pattern match.
Definition: BESRegex.cc:105