OPeNDAP Hyrax Back End Server (BES)  Updated for version 3.8.3
BESCache.cc
Go to the documentation of this file.
00001 // BESCache.cc
00002 
00003 // This file is part of bes, A C++ back-end server implementation framework
00004 // for the OPeNDAP Data Access Protocol.
00005 
00006 // Copyright (c) 2004-2009 University Corporation for Atmospheric Research
00007 // Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
00008 //
00009 // This library is free software; you can redistribute it and/or
00010 // modify it under the terms of the GNU Lesser General Public
00011 // License as published by the Free Software Foundation; either
00012 // version 2.1 of the License, or (at your option) any later version.
00013 // 
00014 // This library is distributed in the hope that it will be useful,
00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 // Lesser General Public License for more details.
00018 // 
00019 // You should have received a copy of the GNU Lesser General Public
00020 // License along with this library; if not, write to the Free Software
00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // You can contact University Corporation for Atmospheric Research at
00024 // 3080 Center Green Drive, Boulder, CO 80301
00025  
00026 // (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
00027 // Please read the full copyright statement in the file COPYRIGHT_UCAR.
00028 //
00029 // Authors:
00030 //      pwest       Patrick West <pwest@ucar.edu>
00031 //      jgarcia     Jose Garcia <jgarcia@ucar.edu>
00032 
00033 #include "config.h"
00034 
00035 #include <unistd.h>  // for unlink
00036 #include <sys/types.h>
00037 #include <sys/stat.h>
00038 #include <dirent.h>
00039 #include <fcntl.h>
00040 
00041 #include <cstring>
00042 #include <cerrno>
00043 #include <iostream>
00044 #include <sstream>
00045 
00046 #include "BESCache.h"
00047 #include "TheBESKeys.h"
00048 #include "BESSyntaxUserError.h"
00049 #include "BESInternalError.h"
00050 #include "BESDebug.h"
00051 
00052 using std::string;
00053 using std::multimap ;
00054 using std::pair ;
00055 using std::greater ;
00056 using std::endl ;
00057 
00058 // conversion factor
00059 static const unsigned long long BYTES_PER_MEG = 1048576ULL;
00060 
00061 // Max cache size in megs, so we can check the user input and warn.
00062 // 2^64 / 2^20 == 2^44
00063 static const unsigned long long MAX_CACHE_SIZE_IN_MEGABYTES = (1ULL << 44);
00064 
00065 void 
00066 BESCache::check_ctor_params()
00067 {
00068     if( _cache_dir.empty() )
00069     {
00070         string err = "The cache directory was not specified, must be non-empty";
00071         throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
00072     }
00073 
00074     struct stat buf;
00075     int statret = stat( _cache_dir.c_str(), &buf ) ;
00076     if( statret != 0 || ! S_ISDIR(buf.st_mode) )
00077     {
00078         string err = "The cache directory " + _cache_dir + " does not exist" ;
00079         throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
00080     }
00081 
00082     if( _prefix.empty() )
00083     {
00084         string err = "The cache file prefix was not specified, must be non-empty" ;
00085         throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
00086     }
00087 
00088     if( _cache_size_in_megs <= 0 )
00089     {
00090         string err = "The cache size was not specified, must be non-zero" ;
00091         throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
00092     }
00093 
00094     // If the user specifies a cache that is too large,
00095     // it is a user exception and we should tell them.
00096     // Actually, this may not work since by this
00097     // time we may have already overflowed the variable...
00098     if( _cache_size_in_megs > MAX_CACHE_SIZE_IN_MEGABYTES )
00099       {
00100         _cache_size_in_megs = MAX_CACHE_SIZE_IN_MEGABYTES ;
00101         std::ostringstream msg;
00102         msg << "The specified cache size was larger than the max cache size of: "
00103             << MAX_CACHE_SIZE_IN_MEGABYTES;
00104         throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
00105       }
00106 
00107     BESDEBUG( "bes", "BES Cache: directory " << _cache_dir
00108                      << ", prefix " << _prefix
00109                      << ", max size " << _cache_size_in_megs << endl ) ;
00110 }
00111 
00121 BESCache::BESCache( const string &cache_dir,
00122                     const string &prefix,
00123                     unsigned long long sizeInMegs )
00124     : _cache_dir( cache_dir ),
00125       _prefix( prefix ),
00126       _cache_size_in_megs( sizeInMegs ),
00127       _lock_fd( -1 )
00128 {
00129     check_ctor_params(); // Throws BESSyntaxUserError on error.
00130 }
00131 
00146 BESCache::BESCache( BESKeys &keys,
00147                     const string &cache_dir_key,
00148                     const string &prefix_key,
00149                     const string &size_key )
00150     : _cache_size_in_megs( 0 ),
00151       _lock_fd( -1 )
00152 {
00153     bool found = false ;
00154     keys.get_value( cache_dir_key, _cache_dir, found ) ;
00155     if( !found )
00156     {
00157         string err = "The cache directory key " + cache_dir_key
00158                      + " was not found in the BES configuration file" ;
00159         throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
00160     }
00161 
00162     found = false ;
00163     keys.get_value( prefix_key, _prefix, found ) ;
00164     if( !found )
00165     {
00166         string err = "The prefix key " + prefix_key
00167                      + " was not found in the BES configuration file" ;
00168         throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
00169     }
00170 
00171     found = false ;
00172     string cache_size_str ;
00173     keys.get_value( size_key, cache_size_str, found ) ;
00174     if( !found )
00175     {
00176         string err = "The size key " + size_key
00177                      + " was not found in the BES configuration file" ;
00178         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00179     }
00180 
00181     std::istringstream is( cache_size_str ) ;
00182     is >> _cache_size_in_megs ;
00183 
00184     check_ctor_params(); // Throws BESSyntaxUserError on error.
00185 }
00186 
00193 bool
00194 BESCache::lock( unsigned int retry, unsigned int num_tries )
00195 {
00196     // make sure we aren't retrying too many times
00197     if( num_tries > MAX_LOCK_TRIES )
00198         num_tries = MAX_LOCK_TRIES ;
00199     if( retry > MAX_LOCK_RETRY_MS )
00200         retry = MAX_LOCK_RETRY_MS ;
00201 
00202     bool got_lock = true ;
00203     if( _lock_fd == -1 )
00204     {
00205         string lock_file = _cache_dir + "/lock" ;
00206         unsigned int tries = 0 ;
00207         _lock_fd = open( lock_file.c_str(),
00208                          O_CREAT | O_EXCL,
00209                          S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ;
00210         while( _lock_fd < 0 && got_lock )
00211         {
00212             tries ++ ;
00213             if( tries > num_tries )
00214             {
00215                 _lock_fd = -1 ;
00216                 got_lock = false ;
00217             }
00218             else
00219             {
00220                 usleep( retry ) ;
00221                 _lock_fd = open( lock_file.c_str(),
00222                                  O_CREAT | O_EXCL,
00223                                  S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ;
00224             }
00225         }
00226     }
00227     else
00228     {
00229         // This would be a programming error, or we've gotten into a
00230         // situation where the lock is lost. Lock has been called on the
00231         // same cache object twice in a row without an unlock being called.
00232         string err = "The cache dir " + _cache_dir + " is already locked" ;
00233         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00234     }
00235 
00236     return got_lock ;
00237 }
00238 
00245 bool
00246 BESCache::unlock()
00247 {
00248     // if we call unlock twice in a row, does it matter? I say no, just say
00249     // that it is unlocked.
00250     bool unlocked = true ;
00251     if( _lock_fd != -1 )
00252     {
00253         string lock_file = _cache_dir + "/lock" ;
00254         close( _lock_fd ) ;
00255         (void)unlink( lock_file.c_str() ) ;
00256     }
00257 
00258     _lock_fd = -1 ;
00259 
00260     return unlocked ;
00261 }
00262 
00276 bool
00277 BESCache::is_cached( const string &src, string &target )
00278 {
00279     bool is_it = true ;
00280     string tmp_target = src ;
00281 
00282     // Create the file that would be created in the cache directory
00283     //echo ${infile} | sed 's/^\///' | sed 's/\//#/g' | sed 's/\(.*\)\..*$/\1/g'
00284     if( tmp_target.at(0) == '/' )
00285     {
00286         tmp_target = src.substr( 1, tmp_target.length() - 1 ) ;
00287     }
00288     string::size_type slash = 0 ;
00289     while( ( slash = tmp_target.find( '/' ) ) != string::npos )
00290     {
00291         tmp_target.replace( slash, 1, 1, BES_CACHE_CHAR ) ;
00292     }
00293     string::size_type last_dot = tmp_target.rfind( '.' ) ;
00294     if( last_dot != string::npos )
00295     {
00296         tmp_target = tmp_target.substr( 0, last_dot ) ;
00297     }
00298 
00299     target = _cache_dir + "/" + _prefix + BES_CACHE_CHAR + tmp_target ;
00300 
00301     // Determine if the target file is already in the cache or not
00302     struct stat buf;
00303     int statret = stat( target.c_str(), &buf ) ;
00304     if( statret != 0 )
00305     {
00306         is_it = false ;
00307     }
00308 
00309     return is_it ;
00310 }
00311 
00312 
00313 
00314 
00323 void
00324 BESCache::purge( )
00325 {
00326     // Fill in contents and get the info
00327     CacheDirInfo cd_info;
00328     collect_cache_dir_info(cd_info);
00329     unsigned long long avg_size = cd_info.get_avg_size();
00330 
00331     // These are references in the refactor, probably would make
00332     // sense to add these calls below to the info, but...
00333     unsigned long long& size = cd_info._total_cache_files_size;
00334     unsigned long long& num_files_in_cache = cd_info._num_files_in_cache;
00335     BESCache::CacheFilesByAgeMap& contents = cd_info._contents;
00336 
00337     BESDEBUG( "bes", "cache size = " << size << endl ) ;
00338     BESDEBUG( "bes", "avg size = " << avg_size << endl ) ;
00339     BESDEBUG( "bes", "num files in cache = "
00340                          << num_files_in_cache << endl ) ;
00341     if( BESISDEBUG( "bes" ) )
00342       {
00343         BESDEBUG( "bes", endl << "BEFORE" << endl ) ;
00344         CacheFilesByAgeMap::iterator ti = contents.begin() ;
00345         CacheFilesByAgeMap::iterator te = contents.end() ;
00346         for( ; ti != te; ti++ )
00347           {
00348             BESDEBUG( "bes", (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl ) ;
00349           }
00350         BESDEBUG( "bes", endl ) ;
00351       }
00352 
00353 
00354     // if the size of files is greater than max allowed then we need to
00355     // purge the cache directory. Keep going until the size is less than
00356     // the max.
00357     // [Maybe change this to size + (fraction of max_size) > max_size?
00358     // jhrg 5/9/07]
00359     unsigned long long max_size_in_bytes = _cache_size_in_megs * BYTES_PER_MEG ; // Bytes/Meg
00360     while( (size+avg_size) > max_size_in_bytes )
00361       {
00362         // Grab the first which is the oldest
00363         // in terms of access time.
00364         CacheFilesByAgeMap::iterator i = contents.begin() ;
00365 
00366         // if we've deleted all entries, exit the loop
00367         if( i == contents.end() )
00368           {
00369             break;
00370           }
00371 
00372         // Otherwise, remove the file with unlink
00373         BESDEBUG( "bes", "BESCache::purge - removing "
00374             << (*i).second.name << endl ) ;
00375         // unlink rather than remove in case the file is in use
00376         // by a forked BES process
00377         if( unlink( (*i).second.name.c_str() ) != 0 )
00378           {
00379             char *s_err = strerror( errno ) ;
00380             string err = "Unable to remove the file "
00381                 + (*i).second.name
00382                 + " from the cache: " ;
00383             if( s_err )
00384               {
00385                 err.append( s_err ) ;
00386               }
00387             else
00388               {
00389                 err.append( "Unknown error" ) ;
00390               }
00391             throw BESInternalError( err, __FILE__, __LINE__ ) ;
00392           }
00393 
00394         size -= (*i).second.size ;
00395         contents.erase( i ) ;
00396       }
00397 
00398     if( BESISDEBUG( "bes" ) )
00399       {
00400         BESDEBUG( "bes", endl << "AFTER" << endl ) ;
00401         CacheFilesByAgeMap::iterator ti = contents.begin() ;
00402         CacheFilesByAgeMap::iterator te = contents.end() ;
00403         for( ; ti != te; ti++ )
00404           {
00405             BESDEBUG( "bes", (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl ) ;
00406           }
00407       }
00408 }
00409 
00410 // Local RAII helper class to be sure the DIR
00411 // is closed in the face of exceptions using RAII
00412 struct DIR_Wrapper
00413 {
00414   DIR_Wrapper(const std::string& dir_name)
00415   {
00416     _dip = opendir(dir_name.c_str());
00417   }
00418 
00419   ~DIR_Wrapper()
00420   {
00421     close();
00422   }
00423 
00424   DIR* get() const { return _dip; }
00425 
00426   void close()
00427   {
00428     if (_dip)
00429       {
00430         closedir(_dip);
00431         _dip = NULL;
00432       }
00433   }
00434 
00435   // data rep
00436   DIR* _dip;
00437 };
00438 
00439 void
00440 BESCache::collect_cache_dir_info(
00441     BESCache::CacheDirInfo& cd_info // output
00442     ) const
00443 {
00444   // start fresh
00445   cd_info.clear();
00446 
00447   time_t curr_time = time( NULL ) ; // grab the current time so we can
00448                                         // determine the oldest file
00449 
00450   DIR_Wrapper dip = DIR_Wrapper( _cache_dir );
00451   if (! (dip.get()) )
00452     {
00453       string err = "Unable to open cache directory " + _cache_dir ;
00454       throw BESInternalError( err, __FILE__, __LINE__ ) ;
00455     }
00456   else // got a dir entry so count up the cached files
00457     {
00458       struct stat buf;
00459       struct dirent *dit;
00460       // go through the cache directory and collect all of the files that
00461       // start with the matching prefix
00462       while( ( dit = readdir( dip.get() ) ) != NULL )
00463         {
00464           string dirEntry = dit->d_name ;
00465           if( dirEntry.compare( 0, _prefix.length(), _prefix ) == 0)
00466             {
00467               // Now that we have found a match we want to get the size of
00468               // the file and the last access time from the file.
00469               string fullPath = _cache_dir + "/" + dirEntry ;
00470               int statret = stat( fullPath.c_str(), &buf ) ;
00471               if( statret == 0 )
00472                 {
00473                   cd_info._total_cache_files_size += buf.st_size ;
00474 
00475                   // Find out how old the file is
00476                   time_t file_time = buf.st_atime ;
00477 
00478                   // I think we can use the access time without the diff,
00479                   // since it's the relative ages that determine when to
00480                   //         delete a file. Good idea to use the access time so
00481                   // recently used (read) files will linger. jhrg 5/9/07
00482                   double time_diff = difftime( curr_time, file_time ) ;
00483                   cache_entry entry ;
00484                   entry.name = fullPath ;
00485                   entry.size = buf.st_size ;
00486                   cd_info._contents.insert( pair<double, cache_entry>( time_diff, entry ) );
00487                   }
00488               cd_info._num_files_in_cache++ ;
00489             }
00490         }
00491     }
00492 
00493   dip.close();
00494 }
00495 
00503 void
00504 BESCache::dump( ostream &strm ) const
00505 {
00506     strm << BESIndent::LMarg << "BESCache::dump - ("
00507                              << (void *)this << ")" << endl ;
00508     BESIndent::Indent() ;
00509     strm << BESIndent::LMarg << "cache dir: " << _cache_dir << endl ;
00510     strm << BESIndent::LMarg << "prefix: " << _prefix << endl ;
00511     strm << BESIndent::LMarg << "size (mb): " << _cache_size_in_megs << endl ;
00512     BESIndent::UnIndent() ;
00513 }
00514