BESCache.cc

Go to the documentation of this file.
00001 // BESCache.cc
00002 
00003 // This file is part of bes, A C++ back-end server implementation framework
00004 // for the OPeNDAP Data Access Protocol.
00005 
00006 // Copyright (c) 2007 University Corporation for Atmospheric Research
00007 // Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
00008 //
00009 // This library is free software; you can redistribute it and/or
00010 // modify it under the terms of the GNU Lesser General Public
00011 // License as published by the Free Software Foundation; either
00012 // version 2.1 of the License, or (at your option) any later version.
00013 // 
00014 // This library is distributed in the hope that it will be useful,
00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 // Lesser General Public License for more details.
00018 // 
00019 // You should have received a copy of the GNU Lesser General Public
00020 // License along with this library; if not, write to the Free Software
00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // You can contact University Corporation for Atmospheric Research at
00024 // 3080 Center Green Drive, Boulder, CO 80301
00025  
00026 // (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
00027 // Please read the full copyright statement in the file COPYRIGHT_UCAR.
00028 //
00029 // Authors:
00030 //      pwest       Patrick West <pwest@ucar.edu>
00031 //      jgarcia     Jose Garcia <jgarcia@ucar.edu>
00032 
00033 #include <unistd.h>
00034 #include <sys/types.h>
00035 #include <sys/stat.h>
00036 #include <dirent.h>
00037 #include <stdio.h>
00038 #include <fcntl.h>
00039 #include <errno.h>
00040 
00041 #include <map>
00042 #include <iostream>
00043 #include <sstream>
00044 
00045 using std::multimap ;
00046 using std::pair ;
00047 using std::greater ;
00048 using std::endl ;
00049 
00050 #include "BESCache.h"
00051 #include "TheBESKeys.h"
00052 #include "BESContainerStorageException.h"
00053 #include "BESDebug.h"
00054 
00055 #define BES_CACHE_CHAR '#'
00056 
00057 typedef struct _cache_entry
00058 {
00059     string name ;
00060     int size ;
00061 } cache_entry ;
00062 
00063 void 
00064 BESCache::check_ctor_params()
00065 {
00066     if( _cache_dir.empty() )
00067     {
00068         string err = "The cache dir was not specified, must be non-empty" ;
00069         throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00070     }
00071 
00072     struct stat buf;
00073     int statret = stat( _cache_dir.c_str(), &buf ) ;
00074     if( statret != 0 || ! S_ISDIR(buf.st_mode) )
00075     {
00076         string err = "The cache dir " + _cache_dir + " does not exist" ;
00077         throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00078     }
00079 
00080     if( _prefix.empty() )
00081     {
00082         string err = "The prefix was not specified, must be non-empty" ;
00083         throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00084     }
00085 
00086     if( _cache_size == 0 )
00087     {
00088         string err = "The cache size was not specified, must be non-zero" ;
00089         throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00090     }
00091 }
00092 
00102 BESCache::BESCache( const string &cache_dir,
00103                     const string &prefix,
00104                     unsigned int size )
00105     : _cache_dir( cache_dir ),
00106       _prefix( prefix ),
00107       _cache_size( size ),
00108       _lock_fd( -1 )
00109 {
00110     check_ctor_params(); // Throws BESContainerStorageException on error.
00111 }
00112 
00127 BESCache::BESCache( BESKeys &keys,
00128                     const string &cache_dir_key,
00129                     const string &prefix_key,
00130                     const string &size_key )
00131     : _cache_size( 0 ),
00132       _lock_fd( -1 )
00133 {
00134     bool found = false ;
00135     _cache_dir = keys.get_key( cache_dir_key, found ) ;
00136     if( !found )
00137     {
00138         string err = "The cache dir key " + cache_dir_key
00139                      + " was not found" ;
00140         throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00141     }
00142 
00143     found = false ;
00144     _prefix = keys.get_key( prefix_key, found ) ;
00145     if( !found )
00146     {
00147         string err = "The prefix key " + prefix_key
00148                      + " was not found" ;
00149         throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00150     }
00151 
00152     found = false ;
00153     string _cache_size_str = keys.get_key( size_key, found ) ;
00154     if( !found )
00155     {
00156         string err = "The size key " + size_key
00157                      + " was not found" ;
00158         throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00159     }
00160 
00161 
00162     std::istringstream is( _cache_size_str ) ;
00163     is >> _cache_size ;
00164 
00165     check_ctor_params(); // Throws BESContainerStorageException on error.
00166 }
00167 
00174 bool
00175 BESCache::lock( unsigned int retry, unsigned int num_tries )
00176 {
00177     bool got_lock = true ;
00178     if( _lock_fd == -1 )
00179     {
00180         string lock_file = _cache_dir + "/lock" ;
00181         unsigned int tries = 0 ;
00182         _lock_fd = open( lock_file.c_str(),
00183                          O_CREAT | O_EXCL,
00184                          S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ;
00185         while( _lock_fd < 0 && got_lock )
00186         {
00187             tries ++ ;
00188             if( tries > num_tries )
00189             {
00190                 _lock_fd = -1 ;
00191                 got_lock = false ;
00192                 /*
00193                 string err = "Unable to lock the cache directory "
00194                              + _cache_dir + ", timed out" ;
00195                 throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00196                 */
00197             }
00198             else
00199             {
00200                 usleep( retry ) ;
00201                 _lock_fd = open( lock_file.c_str(),
00202                                  O_CREAT | O_EXCL,
00203                                  S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ;
00204             }
00205         }
00206     }
00207     else
00208     {
00209         // This would be a programming error, or we've gotten into a
00210         // situation where the lock is lost. Lock has been called on the
00211         // same cache object twice in a row without an unlock being called.
00212         string err = "The cache dir " + _cache_dir + " is already locked" ;
00213         throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00214     }
00215 
00216     return got_lock ;
00217 }
00218 
00225 bool
00226 BESCache::unlock()
00227 {
00228     // if we call unlock twice in a row, does it matter? I say no, just say
00229     // that it is unlocked.
00230     bool unlocked = true ;
00231     if( _lock_fd != -1 )
00232     {
00233         string lock_file = _cache_dir + "/lock" ;
00234         close( _lock_fd ) ;
00235         unlink( lock_file.c_str() ) ;
00236     }
00237 
00238     _lock_fd = -1 ;
00239 
00240     return unlocked ;
00241 }
00242 
00256 bool
00257 BESCache::is_cached( const string &src, string &target )
00258 {
00259     bool is_it = true ;
00260     string tmp_target = src ;
00261 
00262     // Create the file that would be created in the cache directory
00263     //echo ${infile} | sed 's/^\///' | sed 's/\//#/g' | sed 's/\(.*\)\..*$/\1/g'
00264     if( tmp_target.at(0) == '/' )
00265     {
00266         tmp_target = src.substr( 1, tmp_target.length() - 1 ) ;
00267     }
00268     string::size_type slash = 0 ;
00269     while( ( slash = tmp_target.find( '/' ) ) != string::npos )
00270     {
00271         tmp_target.replace( slash, 1, 1, BES_CACHE_CHAR ) ;
00272     }
00273     string::size_type last_dot = tmp_target.rfind( '.' ) ;
00274     if( last_dot != string::npos )
00275     {
00276         tmp_target = tmp_target.substr( 0, last_dot ) ;
00277     }
00278 
00279     target = _cache_dir + "/" + _prefix + BES_CACHE_CHAR + tmp_target ;
00280 
00281     // Determine if the target file is already in the cache or not
00282     struct stat buf;
00283     int statret = stat( target.c_str(), &buf ) ;
00284     if( statret != 0 )
00285     {
00286         is_it = false ;
00287     }
00288 
00289     return is_it ;
00290 }
00291 
00300 void
00301 BESCache::purge( )
00302 {
00303     int max_size = _cache_size * 1048576 ; // Bytes/Meg
00304     struct stat buf;
00305     int size = 0 ; // total size of all cached files
00306     time_t curr_time = time( NULL ) ; // grab the current time so we can
00307                                       // determine the oldest file
00308     // map of time,entry values
00309     multimap<double,cache_entry,greater<double> > contents ;
00310 
00311     // the prefix is actually the specified prefix plus the cache char '#'
00312     string match_prefix = _prefix + BES_CACHE_CHAR ;
00313 
00314     // go through the cache directory and collect all of the files that
00315     // start with the matching prefix
00316     DIR *dip = opendir( _cache_dir.c_str() ) ;
00317     if( dip != NULL )
00318     {
00319         struct dirent *dit;
00320         while( ( dit = readdir( dip ) ) != NULL )
00321         {
00322             string dirEntry = dit->d_name ;
00323             if( dirEntry.compare( 0, match_prefix.length(), match_prefix ) == 0)
00324             {
00325                 // Now that we have found a match we want to get the size of
00326                 // the file and the last access time from the file.
00327                 string fullPath = _cache_dir + "/" + dirEntry ;
00328                 int statret = stat( fullPath.c_str(), &buf ) ;
00329                 if( statret == 0 )
00330                 {
00331                     size += buf.st_size ;
00332 
00333                     // Find out how old the file is
00334                     time_t file_time = buf.st_atime ;
00335                     // I think we can use the access time without the diff,
00336                     // since it's the relative ages that determine when to
00337                     // delete a file. Good idea to use the access time so
00338                     // recently used (read) files will linger. jhrg 5/9/07
00339                     double time_diff = difftime( curr_time, file_time ) ;
00340                     cache_entry entry ;
00341                     entry.name = fullPath ;
00342                     entry.size = buf.st_size ;
00343                     contents.insert( pair<double,cache_entry>( time_diff, entry ) );
00344                 }
00345             }
00346         }
00347 
00348         // We're done looking in the directory, close it
00349         closedir( dip ) ;
00350 
00351 #if 0
00352         cout << endl << "BEFORE" << endl ;
00353         multimap<double,cache_entry,greater<double> >::iterator ti = contents.begin() ;
00354         multimap<double,cache_entry,greater<double> >::iterator te = contents.end() ;
00355         for( ; ti != te; ti++ )
00356         {
00357             cout << (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl ;
00358         }
00359         cout << endl ;
00360 #endif
00361 
00362         // if the size of files is greater than max allowed then we need to
00363         // purge the cache directory. Keep going until the size is less than
00364         // the max.
00365         multimap<double,cache_entry,greater<double> >::iterator i ;
00366         if( size > max_size )
00367         {
00368             // Maybe change this to size + (fraction of max_size) > max_size?
00369             // jhrg 5/9/07
00370             while( size > max_size )
00371             {
00372                 i = contents.begin() ;
00373                 BESDEBUG( "BESCache::purge - removing " << (*i).second.name << endl )
00374                 if( remove( (*i).second.name.c_str() ) != 0 )
00375                 {
00376                     char *s_err = strerror( errno ) ;
00377                     string err = "Unable to remove the file "
00378                                  + (*i).second.name + " from the cache: " ;
00379                     if( s_err )
00380                     {
00381                         err.append( s_err ) ;
00382                     }
00383                     else
00384                     {
00385                         err.append( "Unknown error" ) ;
00386                     }
00387                     throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00388                 }
00389                 size -= (*i).second.size ;
00390                 contents.erase( i ) ;
00391             }
00392         }
00393 
00394 #if 0
00395         cout << endl << "AFTER" << endl ;
00396         ti = contents.begin() ;
00397         te = contents.end() ;
00398         for( ; ti != te; ti++ )
00399         {
00400             cout << (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl ;
00401         }
00402 #endif
00403     }
00404     else
00405     {
00406         string err = "Unable to open cache directory " + _cache_dir ;
00407         throw BESContainerStorageException( err, __FILE__, __LINE__ ) ;
00408     }
00409 }
00410 
00418 void
00419 BESCache::dump( ostream &strm ) const
00420 {
00421     strm << BESIndent::LMarg << "BESCache::dump - ("
00422                              << (void *)this << ")" << endl ;
00423     BESIndent::Indent() ;
00424     strm << BESIndent::LMarg << "cache dir: " << _cache_dir << endl ;
00425     strm << BESIndent::LMarg << "prefix: " << _prefix << endl ;
00426     strm << BESIndent::LMarg << "size: " << _cache_size << endl ;
00427     BESIndent::UnIndent() ;
00428 }
00429 

Generated on Wed Aug 29 03:14:15 2007 for OPeNDAP Back End Server (BES) by  doxygen 1.5.2