OPeNDAP Hyrax Back End Server (BES)  Updated for version 3.8.3
BESDapResponseCache.cc
Go to the documentation of this file.
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2011 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 #include "config.h"
26 
27 //#define DODS_DEBUG
28 
29 #include <sys/stat.h>
30 
31 #include <iostream>
32 #include <string>
33 #include <fstream>
34 #include <sstream>
35 
36 #include <DDS.h>
37 #include <ConstraintEvaluator.h>
38 #include <DDXParserSAX2.h>
39 #include <XDRStreamMarshaller.h>
40 #include <XDRStreamUnMarshaller.h>
41 //<XDRFileUnMarshaller.h>
42 #include <debug.h>
43 #include <mime_util.h> // for last_modified_time() and rfc_822_date()
44 #include <util.h>
45 
46 
47 #include "BESDapResponseCache.h"
48 #include "BESDapResponseBuilder.h"
49 #include "BESInternalError.h"
50 
51 #include "BESUtil.h"
52 #include "TheBESKeys.h"
53 #include "BESDebug.h"
54 
55 #define CRLF "\r\n"
56 
57 using namespace std;
58 using namespace libdap;
59 
60 
61 BESDapResponseCache *BESDapResponseCache::d_instance = 0;
62 const string BESDapResponseCache::PATH_KEY = "DAP.ResponseCache.path";
63 const string BESDapResponseCache::PREFIX_KEY = "DAP.ResponseCache.prefix";
64 const string BESDapResponseCache::SIZE_KEY = "DAP.ResponseCache.size";
65 
67 
68  bool found;
69  string size;
70  unsigned long size_in_megabytes = 0;
71  TheBESKeys::TheKeys()->get_value( SIZE_KEY, size, found ) ;
72  if( found ) {
73  BESDEBUG("cache", "In BESDapResponseCache::getDefaultCacheSize(): Located BES key " <<
74  SIZE_KEY<< "=" << size << endl);
75  istringstream iss(size);
76  iss >> size_in_megabytes;
77  }
78  else {
79  string msg = "[ERROR] BESDapResponseCache::getCacheSize() - The BES Key " + SIZE_KEY + " is not set! It MUST be set to utilize the DAP response cache. ";
80  BESDEBUG("cache", msg);
81  throw BESInternalError(msg , __FILE__, __LINE__);
82  }
83  return size_in_megabytes;
84 }
85 
87  bool found;
88  string prefix = "";
89  TheBESKeys::TheKeys()->get_value( PREFIX_KEY, prefix, found ) ;
90  if( found ) {
91  BESDEBUG("cache", "In BESDapResponseCache::getDefaultCachePrefix(): Located BES key " <<
92  PREFIX_KEY<< "=" << prefix << endl);
93  prefix = BESUtil::lowercase( prefix ) ;
94  }
95  else {
96  string msg = "[ERROR] BESDapResponseCache::getCachePrefix() - The BES Key " + PREFIX_KEY + " is not set! It MUST be set to utilize the DAP response cache. ";
97  BESDEBUG("cache", msg);
98  throw BESInternalError(msg , __FILE__, __LINE__);
99  }
100 
101  return prefix;
102 }
103 
105  bool found;
106 
107  string cacheDir = "";
108  TheBESKeys::TheKeys()->get_value( PATH_KEY, cacheDir, found ) ;
109  if( found ) {
110  BESDEBUG("cache", "In BESDapResponseCache::getCachePrefix(): Located BES key " <<
111  PATH_KEY<< "=" << cacheDir << endl);
112  cacheDir = BESUtil::lowercase( cacheDir ) ;
113  }
114  else {
115  string msg = "[ERROR] BESDapResponseCache::getCacheDir() - The BES Key " + PATH_KEY + " is not set! It MUST be set to utilize the DAP response cache. ";
116  BESDEBUG("cache", msg);
117  throw BESInternalError(msg , __FILE__, __LINE__);
118  }
119  return cacheDir;
120 }
121 
122 
123 BESDapResponseCache::BESDapResponseCache(){
124  BESDEBUG("cache", "In BESDapResponseCache::BESDapResponseCache()" << endl);
125 
126  string cacheDir = getCacheDirFromConfig();
127  string prefix = getCachePrefixFromConfig();
128  unsigned long size_in_megabytes = getCacheSizeFromConfig();
129 
130  BESDEBUG("cache", "BESDapResponseCache() - Cache config params: " << cacheDir << ", " << prefix << ", " << size_in_megabytes << endl);
131 
132  // cerr << endl << "***** BESDapResponseCache::BESDapResponseCache() - Read cache params: " << path << ", " << prefix << ", " << size << endl;
133 
134  // The required params must be present. If initialize() is not called,
135  // then d_cache will stay null and is_available() will return false.
136  // Also, the directory 'path' must exist, or d_cache will be null.
137  if (!cacheDir.empty() && size_in_megabytes > 0)
138  initialize(cacheDir, prefix, size_in_megabytes);
139 
140  BESDEBUG("cache", "Leaving BESDapResponseCache::BESDapResponseCache()" << endl);
141 }
142 
143 
157 BESDapResponseCache::BESDapResponseCache(const string &cache_dir, const string &prefix, unsigned long long size): BESFileLockingCache(cache_dir,prefix,size) {
158 
159 }
160 
161 
174 BESDapResponseCache::get_instance(const string &cache_dir, const string &prefix, unsigned long long size)
175 {
176  if (d_instance == 0){
177  if(dir_exists(cache_dir)){
178  try {
179  d_instance = new BESDapResponseCache(cache_dir, prefix, size);
180  }
181  catch(BESInternalError &bie){
182  BESDEBUG("cache", "BESDapResponseCache::get_instance(): Failed to obtain cache! msg: " << bie.get_message() << endl);
183  }
184  }
185  }
186  return d_instance;
187 }
188 
194 {
195  if (d_instance == 0) {
196  if(dir_exists(getCacheDirFromConfig())){
197  try {
198  d_instance = new BESDapResponseCache();
199  }
200  catch(BESInternalError &bie){
201  BESDEBUG("cache", "BESDapResponseCache::get_instance(): Failed to obtain cache! msg: " << bie.get_message() << endl);
202  }
203  }
204  }
205 
206  return d_instance;
207 }
208 
209 
210 
211 void BESDapResponseCache::delete_instance() {
212  BESDEBUG("cache","BESDapResponseCache::delete_instance() - Deleting singleton BESDapResponseCache instance." << endl);
213  cerr << "BESDapResponseCache::delete_instance() - Deleting singleton BESDapResponseCache instance. d_instance="<< d_instance << endl;
214  delete d_instance;
215  d_instance = 0;
216 }
217 
218 
219 const string chars_excluded_from_filenames = "<>=,/()\"\'";
227 static string
228 build_cache_file_name(const string &dataset, const string &ce)
229 {
230  BESDEBUG("cache", "build_cache_file_name: dataset: " << dataset << ", ce: " << ce << endl);
231 
232  string name = dataset + "#" + ce;
233  string::size_type pos = name.find_first_of(chars_excluded_from_filenames);
234  while (pos != string::npos) {
235  name.replace(pos, 1, "#", 1);
236  pos = name.find_first_of(chars_excluded_from_filenames);
237  }
238 
239  BESDEBUG("cache", "build_cache_file_name: name: " << name << endl);
240 
241  return name;
242 }
243 
253 bool BESDapResponseCache::is_valid(const string &cache_file_name, const string &dataset)
254 {
255  // If the cached response is zero bytes in size, it's not valid.
256  // (hmmm...)
257 
258  off_t entry_size = 0;
259  time_t entry_time = 0;
260  struct stat buf;
261  if (stat(cache_file_name.c_str(), &buf) == 0) {
262  entry_size = buf.st_size;
263  entry_time = buf.st_mtime;
264  }
265  else {
266  return false;
267  }
268 
269  if (entry_size == 0)
270  return false;
271 
272  time_t dataset_time = entry_time;
273  if (stat(dataset.c_str(), &buf) == 0) {
274  dataset_time = buf.st_mtime;
275  }
276 
277  // Trick: if the d_dataset is not a file, stat() returns error and
278  // the times stay equal and the code uses the cache entry.
279 
280  // TODO Fix this so that the code can get a LMT from the correct
281  // handler.
282  if (dataset_time > entry_time)
283  return false;
284 
285  return true;
286 }
287 
299 void BESDapResponseCache::read_data_from_cache(const string &cache_file_name, DDS *fdds)
300 {
301  BESDEBUG("cache", "Opening cache file: " << cache_file_name << endl);
302  ifstream data(cache_file_name.c_str());
303 
304  // Rip off the MIME headers from the response if they are present
305  string mime = get_next_mime_header(data);
306  while (!mime.empty()) {
307  mime = get_next_mime_header(data);
308  }
309 
310  // Parse the DDX; throw an exception on error.
311  DDXParser ddx_parser(fdds->get_factory());
312 
313  // Read the MPM boundary and then read the subsequent headers
314  string boundary = read_multipart_boundary(data);
315  BESDEBUG("cache", "MPM Boundary: " << boundary << endl);
316 
317  read_multipart_headers(data, "text/xml", dap4_ddx);
318 
319  BESDEBUG("cache", "Read the multipart haeaders" << endl);
320 
321  // Parse the DDX, reading up to and including the next boundary.
322  // Return the CID for the matching data part
323  string data_cid;
324  try {
325  ddx_parser.intern_stream(data, fdds, data_cid, boundary);
326  BESDEBUG("cache", "Dataset name: " << fdds->get_dataset_name() << endl);
327  }
328  catch(Error &e) {
329  BESDEBUG("cache", "DDX Parser Error: " << e.get_error_message() << endl);
330  throw;
331  }
332 
333  // Munge the CID into something we can work with
334  BESDEBUG("cache", "Data CID (before): " << data_cid << endl);
335  data_cid = cid_to_header_value(data_cid);
336  BESDEBUG("cache", "Data CID (after): " << data_cid << endl);
337 
338  // Read the data part's MPM part headers (boundary was read by
339  // DDXParse::intern)
340  read_multipart_headers(data, "application/octet-stream", dap4_data, data_cid);
341 
342  // Now read the data
343 
344  // XDRFileUnMarshaller um(data);
345  XDRStreamUnMarshaller um(data);
346  for (DDS::Vars_iter i = fdds->var_begin(); i != fdds->var_end(); i++) {
347  (*i)->deserialize(um, fdds);
348  }
349 }
350 
355 DDS *
356 BESDapResponseCache::get_cached_data_ddx(const string &cache_file_name, BaseTypeFactory *factory, const string &filename)
357 {
358  BESDEBUG("cache", "Reading cache for " << cache_file_name << endl);
359 
360  DDS *fdds = new DDS(factory);
361 
362  fdds->filename(filename) ;
363  //fdds->set_dataset_name( "function_result_" + name_path(filename) ) ;
364 
365  read_data_from_cache(cache_file_name, fdds);
366 
367  BESDEBUG("cache", "DDS Filename: " << fdds->filename() << endl);
368  BESDEBUG("cache", "DDS Dataset name: " << fdds->get_dataset_name() << endl);
369 
370  fdds->set_factory( 0 ) ;
371 
372  // mark everything as read. and send. That is, make sure that when a response
373  // is retrieved from the cache, all of the variables are marked as to be sent
374  DDS::Vars_iter i = fdds->var_begin();
375  while(i != fdds->var_end()) {
376  (*i)->set_read_p( true );
377  (*i++)->set_send_p(true);
378  }
379 
380  return fdds;
381 }
382 
383 
384 
385 #if 0
386 
392 DDS *BESDapResponseCache::read_dataset(const string &filename, const string &constraint, string &cache_token)
393 {
394  // These are used for the cached or newly created DDS object
395  BaseTypeFactory factory;
396  DDS *fdds;
397 
398  // Get the cache filename for this thing. Do not use the default
399  // name mangling; instead use what build_cache_file_name() does.
400  string cache_file_name = get_cache_file_name(build_cache_file_name(filename, constraint), /*mangle*/false);
401  int fd;
402  try {
403  if (get_read_lock(cache_file_name, fd) && is_valid(cache_file_name, filename)) {
404  BESDEBUG("cache", "function ce (change)- cached hit: " << cache_file_name << endl);
405  fdds = get_cached_data_ddx(cache_file_name, &factory, filename);
406  }
407  }
408  catch (...) {
409  BESDEBUG("cache", "caught exception, unlocking cache and re-throw." << endl );
410  // I think this call is not needed. jhrg 10/23/12
411  unlock_cache();
412  throw;
413  }
414 
415  cache_token = cache_file_name; // Set this value-result parameter
416  return fdds;
417 }
418 #endif
419 
450 DDS *BESDapResponseCache::cache_dataset(DDS &dds, const string &constraint, BESDapResponseBuilder *rb, ConstraintEvaluator *eval, string &cache_token)
451 {
452  // These are used for the cached or newly created DDS object
453  BaseTypeFactory factory;
454  DDS *fdds;
455 
456  // Get the cache filename for this thing. Do not use the default
457  // name mangling; instead use what build_cache_file_name() does.
458  string cache_file_name = get_cache_file_name(build_cache_file_name(dds.filename(), constraint), /*mangle*/false);
459  int fd;
460  try {
461  // If the object in the cache is not valid, remove it. The read_lock will
462  // then fail and the code will drop down to the create_and_lock() call.
463  // is_valid() tests for a non-zero object and for d_dateset newer than
464  // the cached object.
465  if (!is_valid(cache_file_name, dds.filename()))
466  purge_file(cache_file_name);
467 
468  if (get_read_lock(cache_file_name, fd)) {
469  BESDEBUG("cache", "function ce (change)- cached hit: " << cache_file_name << endl);
470  fdds = get_cached_data_ddx(cache_file_name, &factory, dds.filename());
471  }
472  else if (create_and_lock(cache_file_name, fd)) {
473  // If here, the cache_file_name could not be locked for read access;
474  // try to build it. First make an empty file and get an exclusive lock on it.
475  BESDEBUG("cache", "function ce - caching " << cache_file_name << ", constraint: " << constraint << endl);
476 
477  fdds = new DDS(dds);
478  eval->parse_constraint(constraint, *fdds);
479 
480  if (eval->function_clauses()) {
481  DDS *temp_fdds = eval->eval_function_clauses(*fdds);
482  delete fdds;
483  fdds = temp_fdds;
484  }
485 
486  ofstream data_stream(cache_file_name.c_str());
487  if (!data_stream)
488  throw InternalErr(__FILE__, __LINE__, "Could not open '" + cache_file_name + "' to write cached response.");
489 
490  string start="dataddx_cache_start", boundary="dataddx_cache_boundary";
491 
492  // Use a ConstraintEvaluator that has not parsed a CE so the code can use
493  // the send method(s)
494  ConstraintEvaluator eval;
495 
496  // Setting the version to 3.2 causes send_data_ddx to write the MIME headers that
497  // the cache expects.
498  fdds->set_dap_version("3.2");
499 
500  // This is a bit of a hack, but it effectively uses ResponseBuilder to write the
501  // cached object/response without calling the machinery in one of the send_*()
502  // methods. Those methods assume they need to evaluate the BESDapResponseBuilder's
503  // CE, which is not necessary and will alter the values of the send_p property
504  // of the DDS's variables.
505  set_mime_multipart(data_stream, boundary, start, dap4_data_ddx, x_plain, last_modified_time(rb->get_dataset_name()));
506  //data_stream << flush;
507  rb->dataset_constraint_ddx(data_stream, *fdds, eval, boundary, start);
508  //data_stream << flush;
509 
510  data_stream << CRLF << "--" << boundary << "--" << CRLF;
511 
512  data_stream.close();
513 
514  // Change the exclusive lock on the new file to a shared lock. This keeps
515  // other processes from purging the new file and ensures that the reading
516  // process can use it.
518 
519  // Now update the total cache size info and purge if needed. The new file's
520  // name is passed into the purge method because this process cannot detect its
521  // own lock on the file.
522  unsigned long long size = update_cache_info(cache_file_name);
523  if (cache_too_big(size))
524  update_and_purge(cache_file_name);
525  }
526  // get_read_lock() returns immediately if the file does not exist,
527  // but blocks waiting to get a shared lock if the file does exist.
528  else if (get_read_lock(cache_file_name, fd)) {
529  BESDEBUG("cache", "function ce - cached hit: " << cache_file_name << endl);
530  fdds = get_cached_data_ddx(cache_file_name, &factory, dds.get_dataset_name());
531  }
532  else {
533  throw InternalErr(__FILE__, __LINE__, "Cache error during function invocation.");
534  }
535  }
536  catch (...) {
537  BESDEBUG("cache", "caught exception, unlocking cache and re-throw." << endl );
538  // I think this call is not needed. jhrg 10/23/12
539  unlock_cache();
540  throw;
541  }
542 
543  cache_token = cache_file_name; // Set this value-result parameter
544  return fdds;
545 }
546 
virtual void unlock_cache()
Unlock the cache info file.
static unsigned long getCacheSizeFromConfig()
virtual libdap::DDS * cache_dataset(libdap::DDS &dds, const std::string &constraint, BESDapResponseBuilder *rb, libdap::ConstraintEvaluator *eval, std::string &cache_token)
Get the cached DDS object.
exception thrown if inernal error encountered
virtual bool create_and_lock(const string &target, int &fd)
Create a file in the cache and lock it for write access.
static string lowercase(const string &s)
Convert a string to all lower case.
Definition: BESUtil.cc:179
const string chars_excluded_from_filenames
static BESDapResponseCache * get_instance()
Get the default instance of the BESDapResponseCache object.
This class is used to cache DAP2 response objects.
Implementation of a caching mechanism for compressed data.
virtual string get_message()
get the error message for this exception
Definition: BESError.h:94
virtual void purge_file(const string &file)
Purge a single file from the cache.
virtual bool cache_too_big(unsigned long long current_size) const
look at the cache size; is it too large? Look at the cache size and see if it is too big...
static const string SIZE_KEY
virtual string get_cache_file_name(const string &src, bool mangle=true)
Build the name of file that will holds the uncompressed data from 'src' in the cache.
static string getCacheDirFromConfig()
static string getCachePrefixFromConfig()
virtual bool get_read_lock(const string &target, int &fd)
Get a read-only lock on the file if it exists.
void get_value(const string &s, string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: BESKeys.cc:453
virtual void update_and_purge(const string &new_file)
Purge files from the cache.
virtual unsigned long long update_cache_info(const string &target)
Update the cache info file to include 'target'.
virtual void exclusive_to_shared_lock(int fd)
Transfer from an exclusive lock to a shared lock.
#define CRLF
This class is used to build responses for/by the BES.
#define BESDEBUG(x, y)
macro used to send debug information to the debug stream
Definition: BESDebug.h:64
static const string PREFIX_KEY
virtual void dataset_constraint_ddx(std::ostream &out, libdap::DDS &dds, libdap::ConstraintEvaluator &eval, const std::string &boundary, const std::string &start, bool ce_eval=true)
Build/return the DDX and the BLOB part of the DAP3.x data response.
static BESKeys * TheKeys()
Definition: TheBESKeys.cc:48
virtual std::string get_dataset_name() const
The ``dataset name'' is the filename or other string that the filter program will use to access the d...
static const string PATH_KEY