bes  Updated for version 3.20.6
BESDapFunctionResponseCache.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of HYrax, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2016 OPeNDAP, Inc.
7 // Author: Nathan David Potter <ndp@opendap.org>
8 // James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 #include "config.h"
27 
28 //#define DODS_DEBUG
29 
30 #include <cstdio>
31 #include <unistd.h>
32 #include <sys/stat.h>
33 
34 #include <iostream>
35 #include <string>
36 #include <fstream>
37 #include <sstream>
38 
39 #ifdef HAVE_TR1_FUNCTIONAL
40 #include <tr1/functional>
41 #endif
42 
43 #include <DDS.h>
44 #include <ConstraintEvaluator.h>
45 #include <DDXParserSAX2.h>
46 
47 #include <XDRStreamMarshaller.h>
48 #include <XDRStreamUnMarshaller.h>
49 #include <XDRFileUnMarshaller.h>
50 
51 #include <D4StreamMarshaller.h>
52 #include <D4StreamUnMarshaller.h>
53 
54 #include <Sequence.h> // We have to special-case these; see read_data_ddx()
55 
56 #include <debug.h>
57 #include <mime_util.h> // for last_modified_time() and rfc_822_date()
58 #include <util.h>
59 
60 #include "CacheTypeFactory.h"
61 #include "CacheMarshaller.h"
62 #include "CacheUnMarshaller.h"
63 
64 #include "BESDapFunctionResponseCache.h"
65 #include "BESDapResponseBuilder.h"
66 #include "BESInternalError.h"
67 
68 #include "BESUtil.h"
69 #include "TheBESKeys.h"
70 #include "BESLog.h"
71 #include "BESDebug.h"
72 
73 #define DEBUG_KEY "response_cache"
74 
75 #ifdef HAVE_TR1_FUNCTIONAL
76 #define HASH_OBJ std::tr1::hash
77 #else
78 #define HASH_OBJ std::hash
79 #endif
80 
81 using namespace std;
82 using namespace libdap;
83 
84 const string DATA_MARK = "--DATA:";
85 
86 // If the size of the constraint is larger then this value, don't cache the response.
87 const unsigned int max_cacheable_ce_len = 4096;
88 const unsigned int max_collisions = 50; // It's hard to believe this could happen
89 
90 const unsigned int default_cache_size = 20; // 20 GB
91 const string default_cache_prefix = "rc";
92 const string default_cache_dir = ""; // I'm making the default empty so that no key == no caching. jhrg 9.26.16
93 
94 const string BESDapFunctionResponseCache::PATH_KEY = "DAP.FunctionResponseCache.path";
95 const string BESDapFunctionResponseCache::PREFIX_KEY = "DAP.FunctionResponseCache.prefix";
96 const string BESDapFunctionResponseCache::SIZE_KEY = "DAP.FunctionResponseCache.size";
97 
98 BESDapFunctionResponseCache *BESDapFunctionResponseCache::d_instance = 0;
99 bool BESDapFunctionResponseCache::d_enabled = true;
100 
101 unsigned long BESDapFunctionResponseCache::get_cache_size_from_config()
102 {
103  bool found;
104  string size;
105  unsigned long size_in_megabytes = default_cache_size;
106  TheBESKeys::TheKeys()->get_value(SIZE_KEY, size, found);
107  if (found) {
108  BESDEBUG(DEBUG_KEY,
109  "BESDapFunctionResponseCache::getCacheSizeFromConfig(): Located BES key " << SIZE_KEY<< "=" << size << endl);
110  istringstream iss(size);
111  iss >> size_in_megabytes;
112  }
113 
114  return size_in_megabytes;
115 }
116 
117 string BESDapFunctionResponseCache::get_cache_prefix_from_config()
118 {
119  bool found;
120  string prefix = default_cache_prefix;
121  TheBESKeys::TheKeys()->get_value(PREFIX_KEY, prefix, found);
122  if (found) {
123  BESDEBUG(DEBUG_KEY,
124  "BESDapFunctionResponseCache::getCachePrefixFromConfig(): Located BES key " << PREFIX_KEY<< "=" << prefix << endl);
125  prefix = BESUtil::lowercase(prefix);
126  }
127 
128  return prefix;
129 }
130 
131 // If the cache prefix is the empty string, the cache is turned off.
132 string BESDapFunctionResponseCache::get_cache_dir_from_config()
133 {
134  bool found;
135 
136  string cacheDir = default_cache_dir;
137  TheBESKeys::TheKeys()->get_value(PATH_KEY, cacheDir, found);
138  if (found) {
139  BESDEBUG(DEBUG_KEY,
140  "BESDapFunctionResponseCache::getCacheDirFromConfig(): Located BES key " << PATH_KEY<< "=" << cacheDir << endl);
141  }
142 
143  return cacheDir;
144 }
145 
164 BESDapFunctionResponseCache::get_instance(const string &cache_dir, const string &prefix, unsigned long long size)
165 {
166  if (d_enabled && d_instance == 0) {
167  if (!cache_dir.empty() && dir_exists(cache_dir)) {
168  d_instance = new BESDapFunctionResponseCache(cache_dir, prefix, size);
169  d_enabled = d_instance->cache_enabled();
170  if(!d_enabled){
171  delete d_instance;
172  d_instance = NULL;
173  BESDEBUG("cache", "BESDapFunctionResponseCache::"<<__func__ << "() - " <<
174  "Cache is DISABLED"<< endl);
175  }
176  else {
177  #ifdef HAVE_ATEXIT
178  atexit(delete_instance);
179  #endif
180  BESDEBUG("cache", "BESDapFunctionResponseCache::"<<__func__ << "() - " <<
181  "Cache is ENABLED"<< endl);
182  }
183  }
184  }
185 
186  BESDEBUG(DEBUG_KEY,
187  "BESDapFunctionResponseCache::get_instance(dir,prefix,size) - d_instance: " << d_instance << endl);
188 
189  return d_instance;
190 }
191 
193 BESDapFunctionResponseCache::get_instance()
194 {
195  if (d_enabled && d_instance == 0) {
196  string cache_dir = get_cache_dir_from_config();
197  if (!cache_dir.empty() && dir_exists(cache_dir)) {
198  d_instance = new BESDapFunctionResponseCache(get_cache_dir_from_config(), get_cache_prefix_from_config(),
199  get_cache_size_from_config());
200  d_enabled = d_instance->cache_enabled();
201  if(!d_enabled){
202  delete d_instance;
203  d_instance = NULL;
204  BESDEBUG("cache", "BESDapFunctionResponseCache::"<<__func__ << "() - " <<
205  "Cache is DISABLED"<< endl);
206  }
207  else {
208  #ifdef HAVE_ATEXIT
209  atexit(delete_instance);
210  #endif
211  BESDEBUG("cache", "BESDapFunctionResponseCache::"<<__func__ << "() - " <<
212  "Cache is ENABLED"<< endl);
213  }
214  }
215  }
216 
217  BESDEBUG(DEBUG_KEY, "BESDapFunctionResponseCache::get_instance() - d_instance: " << (void *) d_instance << endl);
218 
219  return d_instance;
220 }
222 
232 bool BESDapFunctionResponseCache::is_valid(const string &cache_file_name, const string &dataset)
233 {
234  // If the cached response is zero bytes in size, it's not valid. This is true
235  // because a DAP data object, even if it has no data still has a metadata part.
236  // jhrg 10/20/15
237 
238  off_t entry_size = 0;
239  time_t entry_time = 0;
240  struct stat buf;
241  if (stat(cache_file_name.c_str(), &buf) == 0) {
242  entry_size = buf.st_size;
243  entry_time = buf.st_mtime;
244  }
245  else {
246  return false;
247  }
248 
249  if (entry_size == 0) return false;
250 
251  time_t dataset_time = entry_time;
252  if (stat(dataset.c_str(), &buf) == 0) {
253  dataset_time = buf.st_mtime;
254  }
255 
256  // Trick: if the d_dataset is not a file, stat() returns error and
257  // the times stay equal and the code uses the cache entry.
258 
259  // TODO Fix this so that the code can get a LMT from the correct handler.
260  if (dataset_time > entry_time) return false;
261 
262  return true;
263 }
264 
265 string BESDapFunctionResponseCache::get_resource_id(DDS *dds, const string &constraint)
266 {
267  return dds->filename() + "#" + constraint;
268 }
269 
270 bool BESDapFunctionResponseCache::can_be_cached(DDS *dds, const string &constraint)
271 {
272  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " constraint + dds->filename() length: "
273  << constraint.length() + dds->filename().size() << endl);
274 
275  return (constraint.length() + dds->filename().size() <= max_cacheable_ce_len);
276 }
277 
285 string BESDapFunctionResponseCache::get_hash_basename(const string &resource_id)
286 {
287  // Get a hash function for strings
288  HASH_OBJ<string> str_hash;
289  size_t hashValue = str_hash(resource_id);
290  stringstream hashed_id;
291  hashed_id << hashValue;
292  string cache_file_name = get_cache_directory();
293  cache_file_name.append("/").append(get_cache_file_prefix()).append(hashed_id.str());
294 
295  return cache_file_name;
296 }
297 
319 DDS *
320 BESDapFunctionResponseCache::get_or_cache_dataset(DDS *dds, const string &constraint)
321 {
322  // Build the response_id. Since the response content is a function of both the dataset AND the constraint,
323  // glue them together to get a unique id for the response.
324  string resourceId = dds->filename() + "#" + constraint;
325 
326  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " resourceId: '" << resourceId << "'" << endl);
327 
328  // Get a hash function for strings
329  HASH_OBJ<string> str_hash;
330 
331  // Use the hash function to hash the resourceId.
332  size_t hashValue = str_hash(resourceId);
333  stringstream hashed_id;
334  hashed_id << hashValue;
335 
336  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " hashed_id: '" << hashed_id.str() << "'" << endl);
337 
338  // Use the parent class's get_cache_file_name() method and its associated machinery to get the file system path for the cache file.
339  // We store it in a variable called basename because the value is later extended as part of the collision avoidance code.
340  string cache_file_name = BESFileLockingCache::get_cache_file_name(hashed_id.str(), false);
341 
342  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " cache_file_name: '" << cache_file_name << "'" << endl);
343 
344  // Does the cached dataset exist? if yes, ret_dds points to it. If no,
345  // cache_file_name is updated to be the correct name for write_dataset_
346  // to_cache().
347  DDS *ret_dds = 0;
348  if ((ret_dds = load_from_cache(resourceId, cache_file_name))) {
349  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " Data loaded from cache file: " << cache_file_name << endl);
350  ret_dds->filename(dds->filename());
351  }
352  else if ((ret_dds = write_dataset_to_cache(dds, resourceId, constraint, cache_file_name))) {
353  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " Data written to cache file: " << cache_file_name << endl);
354  }
355  // get_read_lock() returns immediately if the file does not exist,
356  // but blocks waiting to get a shared lock if the file does exist.
357  else if ((ret_dds = load_from_cache(resourceId, cache_file_name))) {
358  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " Data loaded from cache file (2nd try): " << cache_file_name << endl);
359  ret_dds->filename(dds->filename());
360  }
361 
362  BESDEBUG(DEBUG_KEY,__FUNCTION__ << " Used cache_file_name: " << cache_file_name << " for resource ID: " << resourceId << endl);
363 
364  return ret_dds;
365 }
366 
383 DDS *
384 BESDapFunctionResponseCache::load_from_cache(const string &resource_id, string &cache_file_name)
385 {
386  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " resource_id: " << resource_id << endl);
387 
388  DDS *cached_dds = 0; // nullptr
389 
390  unsigned long suffix_counter = 0;
391  bool keep_looking = true;
392  do {
393  if (suffix_counter > max_collisions) {
394  stringstream ss;
395  ss << "Cache error! There are " << suffix_counter << " hash collisions for the resource '" << resource_id
396  << "' And that is a bad bad thing.";
397  throw BESInternalError(ss.str(), __FILE__, __LINE__);
398  }
399 
400  // Build cache_file_name and cache_id_file_name from baseName
401  stringstream cfname;
402  cfname << cache_file_name << "_" << suffix_counter++;
403 
404  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " candidate cache_file_name: " << cfname.str() << endl);
405 
406  int fd; // unused
407  if (!get_read_lock(cfname.str(), fd)) {
408  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " !get_read_lock(cfname.str(), fd): " << fd << endl);
409  // If get_read_lock() returns false, that means the cache file doesn't exist.
410  // Set keep_looking to false and exit the loop.
411  keep_looking = false;
412  // Set the cache file name to the current value of cfname.str() - this is
413  // the name that does not exist and should be used by write_dataset_to_cache()
414  cache_file_name = cfname.str();
415  }
416  else {
417  // If get_read_lock() returns true, the cache file exists; look and see if
418  // it's the correct one. If so, cached_dds will be true and we exit.
419 
420  // Read the first line from the cache file and see if it matches the resource id
421  ifstream cache_file_istream(cfname.str().c_str());
422  char line[max_cacheable_ce_len];
423  cache_file_istream.getline(line, max_cacheable_ce_len);
424  string cached_resource_id;
425  cached_resource_id.assign(line);
426 
427  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " cached_resource_id: " << cached_resource_id << endl);
428 
429  if (cached_resource_id.compare(resource_id) == 0) {
430  // WooHoo Cache Hit!
431  BESDEBUG(DEBUG_KEY, "BESDapFunctionResponseCache::load_from_cache() - Cache Hit!" << endl);
432 
433  // non-null value value for cached_dds will exit the loop
434  cached_dds = read_cached_data(cache_file_istream);
435  }
436 
437  unlock_and_close(cfname.str());
438  }
439  } while (!cached_dds && keep_looking);
440 
441  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " Cache " << (cached_dds!=0?"HIT":"MISS") << " for: " << cache_file_name << endl);
442 
443  return cached_dds;
444 }
445 
450 DDS *
451 BESDapFunctionResponseCache::read_cached_data(istream &cached_data)
452 {
453  // Build a CachedSequence; all other types are as BaseTypeFactory builds
454  CacheTypeFactory factory;
455  DDS *fdds = new DDS(&factory);
456 
457  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " - BEGIN" << endl);
458 
459  // Parse the DDX; throw an exception on error.
460  DDXParser ddx_parser(fdds->get_factory());
461 
462  // Parse the DDX, reading up to and including the next boundary.
463  // Return the CID for the matching data part
464  string data_cid; // Not used. jhrg 5/5/16
465  try {
466  ddx_parser.intern_stream(cached_data, fdds, data_cid, DATA_MARK);
467  }
468  catch (Error &e) { // Catch the libdap::Error and throw BESInternalError
469  throw BESInternalError(e.get_error_message(), __FILE__, __LINE__);
470  }
471 
472  CacheUnMarshaller um(cached_data);
473 
474  for (DDS::Vars_iter i = fdds->var_begin(), e = fdds->var_end(); i != e; ++i) {
475  (*i)->deserialize(um, fdds);
476  }
477 
478  // mark everything as read. And 'to send.' That is, make sure that when a response
479  // is retrieved from the cache, all of the variables are marked as 'to be sent.'
480  for (DDS::Vars_iter i = fdds->var_begin(), e = fdds->var_end(); i != e; ++i) {
481  (*i)->set_read_p(true);
482  (*i)->set_send_p(true);
483 
484  // For Sequences, deserialize() will update the 'current row number,' which
485  // is the correct behavior but which will also confuse serialize(). Reset the
486  // current row number here so serialize() can start working from row 0. jhrg 5/13/16
487  // Note: Now uses the recursive version of reset_row_number. jhrg 5/16/16
488  if ((*i)->type() == dods_sequence_c) {
489  static_cast<Sequence*>(*i)->reset_row_number(true);
490  }
491  }
492 
493  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " - END." << endl);
494 
495  fdds->set_factory(0); // Make sure there is no left-over cruft in the returned DDS
496 
497  return fdds;
498 }
499 
515 DDS *
516 BESDapFunctionResponseCache::write_dataset_to_cache(DDS *dds, const string &resource_id, const string &func_ce,
517  const string &cache_file_name)
518 {
519  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " BEGIN " << resource_id << ": "
520  << func_ce << ": " << cache_file_name << endl);
521 
522  DDS *fdds = 0; // will hold the return value
523 
524  int fd;
525  if (create_and_lock(cache_file_name, fd)) {
526  // If here, the cache_file_name could not be locked for read access;
527  // try to build it. First make an empty files and get an exclusive lock on them.
528  BESDEBUG(DEBUG_KEY,__FUNCTION__ << " Caching " << resource_id << ", func_ce: " << func_ce << endl);
529 
530  // Get an output stream directed at the locked cache file
531  ofstream cache_file_ostream(cache_file_name.c_str(), ios::out|ios::app|ios::binary);
532  if (!cache_file_ostream.is_open())
533  throw BESInternalError("Could not open '" + cache_file_name + "' to write cached response.", __FILE__, __LINE__);
534 
535  try {
536  // Write the resource_id to the first line of the cache file
537  cache_file_ostream << resource_id << endl;
538 
539  // Evaluate the function
540  ConstraintEvaluator func_eval;
541  func_eval.parse_constraint(func_ce, *dds);
542  fdds = func_eval.eval_function_clauses(*dds);
543 
544  fdds->print_xml_writer(cache_file_ostream, true, "");
545 
546  cache_file_ostream << DATA_MARK << endl;
547 
548  // Define the scope of the StreamMarshaller because for some types it will use
549  // a child thread to send data and it's dtor will wait for that thread to complete.
550  // We want that before we close the output stream (cache_file_stream) jhrg 5/6/16
551  {
552  ConstraintEvaluator new_ce;
553  CacheMarshaller m(cache_file_ostream);
554 
555  for (DDS::Vars_iter i = fdds->var_begin(); i != fdds->var_end(); i++) {
556  if ((*i)->send_p()) {
557  (*i)->serialize(new_ce, *fdds, m, false);
558  }
559  }
560  }
561 
562  // Change the exclusive locks on the new file to a shared lock. This keeps
563  // other processes from purging the new file and ensures that the reading
564  // process can use it.
565  exclusive_to_shared_lock(fd);
566 
567  // Now update the total cache size info and purge if needed. The new file's
568  // name is passed into the purge method because this process cannot detect its
569  // own lock on the file.
570  unsigned long long size = update_cache_info(cache_file_name);
571  if (cache_too_big(size)) update_and_purge(cache_file_name);
572 
573  unlock_and_close(cache_file_name);
574  }
575  catch (...) {
576  // Bummer. There was a problem doing The Stuff. Now we gotta clean up.
577  cache_file_ostream.close();
578  this->purge_file(cache_file_name);
579  unlock_and_close(cache_file_name);
580  throw;
581  }
582  }
583 
584  return fdds;
585 }
586 
CacheMarshaller
Marshaller that knows how serialize dap data objects to a disk cache This class can be used with libd...
Definition: CacheMarshaller.h:42
CacheTypeFactory
Definition: CacheTypeFactory.h:41
CacheUnMarshaller
UnMarshaller that knows how to deserialize dap objects.
Definition: CacheUnMarshaller.h:43
BESDapFunctionResponseCache::get_or_cache_dataset
virtual libdap::DDS * get_or_cache_dataset(libdap::DDS *dds, const std::string &constraint)
Return a DDS loaded with data that can be serialized back to a client.
Definition: BESDapFunctionResponseCache.cc:320
libdap
Definition: BESDapFunctionResponseCache.h:35
TheBESKeys::TheKeys
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:62
BESInternalError
exception thrown if internal error encountered
Definition: BESInternalError.h:43
TheBESKeys::get_value
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:272
BESFileLockingCache::get_cache_file_name
virtual std::string get_cache_file_name(const std::string &src, bool mangle=true)
Definition: BESFileLockingCache.cc:451
Error
BESUtil::lowercase
static std::string lowercase(const std::string &s)
Definition: BESUtil.cc:200
BESDapFunctionResponseCache
Cache the results from server functions.
Definition: BESDapFunctionResponseCache.h:72