bes  Updated for version 3.20.6
DirectoryUtil.cc
1 // This file is part of the "NcML Module" project, a BES module designed
3 // to allow NcML files to be used to be used as a wrapper to add
4 // AIS to existing datasets of any format.
5 //
6 // Copyright (c) 2009 OPeNDAP, Inc.
7 // Author: Michael Johnson <m.johnson@opendap.org>
8 //
9 // For more information, please also see the main website: http://opendap.org/
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 //
25 // Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26 //
27 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29 
30 #include "config.h"
31 #include "DirectoryUtil.h"
32 
33 #include <cstring>
34 #include <cerrno>
35 #include <sstream>
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <dirent.h>
39 
40 // libdap
41 #include "GNURegex.h"
42 
43 // bes
44 #include "BESDebug.h"
45 #include "BESForbiddenError.h"
46 #include "BESInternalError.h"
47 #include "TheBESKeys.h"
48 #include "BESNotFoundError.h"
49 #include "BESUtil.h"
50 
51 using std::string;
52 using std::vector;
53 using std::endl;
54 
55 namespace agg_util {
60 struct DirWrapper {
61 public:
62 
63  DirWrapper(const string& fullDirPath) :
64  _pDir(0), _fullPath(fullDirPath)
65  {
66  // if the user sees null after this, they can check the errno.
67  _pDir = opendir(fullDirPath.c_str());
68  }
69 
70  ~DirWrapper()
71  {
72  if (_pDir) {
73  closedir(_pDir);
74  _pDir = 0;
75  }
76  }
77 
78  bool fail() const
79  {
80  return !_pDir;
81  }
82 
83  DIR*
84  get() const
85  {
86  return _pDir;
87  }
88 
89  // automatically closedir() if non-null on dtor.
90  DIR* _pDir;
91  std::string _fullPath;
92 };
93 
95 FileInfo::FileInfo(const std::string& path, const std::string& basename, bool isDir, time_t modTime) :
96  _path(path), _basename(basename), _fullPath("") // start empty, cached later
97  , _isDir(isDir), _modTime(modTime)
98 {
101 }
102 
103 FileInfo::~FileInfo()
104 {
105 }
106 
107 const std::string&
109 {
110  return _path;
111 }
112 
113 const std::string&
114 FileInfo::basename() const
115 {
116  return _basename;
117 }
118 
119 bool FileInfo::isDir() const
120 {
121  return _isDir;
122 }
123 
124 time_t FileInfo::modTime() const
125 {
126  return _modTime;
127 }
128 
129 std::string FileInfo::getModTimeAsString() const
130 {
131  // we'll just use UTC for the output...
132  struct tm* pTM = gmtime(&_modTime);
133  char buf[128];
134  // this should be "Year-Month-Day Hour:Minute:Second"
135  strftime(buf, 128, "%F %T", pTM);
136  return string(buf);
137 }
138 
139 const std::string&
141 {
142  if (_fullPath.empty()) {
143  _fullPath = _path + "/" + _basename;
144  }
145  return _fullPath;
146 }
147 
148 std::string FileInfo::toString() const
149 {
150  return "{FileInfo fullPath=" + getFullPath() + " isDir=" + ((isDir()) ? ("true") : ("false")) + " modTime=\""
151  + getModTimeAsString() + "\""
152  " }";
153 }
154 
156 
157 const string DirectoryUtil::_sDebugChannel = "agg_util";
158 
159 DirectoryUtil::DirectoryUtil() :
160  _rootDir("/"), _suffix("") // we start with no filter
161  , _pRegExp(0), _filteringModTimes(false), _newestModTime(0L)
162 {
163  // this can throw, but the class is completely constructed by this point.
164  setRootDir("/");
165 }
166 
167 DirectoryUtil::~DirectoryUtil()
168 {
169  clearRegExp();
170 }
171 
173 const std::string&
175 {
176  return _rootDir;
177 }
178 
184 void DirectoryUtil::setRootDir(const std::string& origRootDir, bool allowRelativePaths/*=false*/,
185  bool /*allowSymLinks=false*/)
186 {
187  if (!allowRelativePaths && hasRelativePath(origRootDir)) {
188  throw BESForbiddenError("can't use rootDir=" + origRootDir + " since it has a relative path (../)", __FILE__,
189  __LINE__);
190  }
191 
192  // Get the root without trailing slash, we'll add it.
193  _rootDir = origRootDir;
194  removeTrailingSlashes(_rootDir);
195  // If empty here, that means the actual filesystem root.
196 
197  // Use the BESUtil to test the path
198  // Since it assumes root is valid and strips preceding "/",
199  // we use "/" as the root path and the root path as the path
200  // to validate the root. This will throw if invalid.
201  BESUtil::check_path(_rootDir, "/", false); // not going to allow symlinks by default.
202 
203  // We should be good if we get here.
204 }
205 
206 void DirectoryUtil::setFilterSuffix(const std::string& suffix)
207 {
208  _suffix = suffix;
209 }
210 
211 void DirectoryUtil::setFilterRegExp(const std::string& regexp)
212 {
213  clearRegExp(); // avoid leaks
214  if (!regexp.empty()) {
215  _pRegExp = new libdap::Regex(regexp.c_str());
216  }
217 }
218 
220 {
221  delete _pRegExp;
222  _pRegExp = 0;
223 }
224 
226 {
227  _newestModTime = newestModTime;
228  _filteringModTimes = true;
229 }
230 
231 void DirectoryUtil::getListingForPath(const std::string& path, std::vector<FileInfo>* pRegularFiles,
232  std::vector<FileInfo>* pDirectories)
233 {
234  string pathToUse(path);
235  removePrecedingSlashes(pathToUse);
236  pathToUse = getRootDir() + "/" + pathToUse;
237  BESDEBUG(_sDebugChannel, "Attempting to get dir listing for path=\"" << pathToUse << "\"" << endl);
238 
239  // RAII, will closedir no matter how we leave function, including a throw
240  DirWrapper pDir(pathToUse);
241  if (pDir.fail()) {
242  throwErrorForOpendirFail(pathToUse);
243  }
244 
245  // Go through each entry and see if it's a directory or regular file and
246  // add it to the list.
247  struct dirent* pDirEnt = 0;
248  while ((pDirEnt = readdir(pDir.get())) != 0) {
249  string entryName = pDirEnt->d_name;
250  // Exclude ".", ".." and any dotfile dirs like ".svn".
251  if (!entryName.empty() && entryName[0] == '.') {
252  continue;
253  }
254 
255  // Figure out if it's a regular file or directory
256  string pathToEntry = pathToUse + "/" + entryName;
257  struct stat statBuf;
258  int statResult = stat(pathToEntry.c_str(), &statBuf);
259  if (statResult != 0) {
260  // If we can't stat the file for some reason, then ignore it
261  continue;
262  }
263 
264  // Use the passed in path for the entry since we
265  // want to make the locations be relative to the root
266  // for loading later.
267  if (pDirectories && S_ISDIR(statBuf.st_mode)) {
268  pDirectories->push_back(FileInfo(path, entryName, true, statBuf.st_mtime));
269  }
270  else if (pRegularFiles && S_ISREG(statBuf.st_mode)) {
271  FileInfo theFile(path, entryName, false, statBuf.st_mtime);
272  // match against the relative passed in path, not root full path
273  if (matchesAllFilters(theFile.getFullPath(), statBuf.st_mtime)) {
274  pRegularFiles->push_back(theFile);
275  }
276  }
277  }
278 }
279 
280 void DirectoryUtil::getListingForPathRecursive(const std::string& path, std::vector<FileInfo>* pRegularFiles,
281  std::vector<FileInfo>* pDirectories)
282 {
283  // Remove trailing slash to make it canonical
284  string canonicalPath = path;
285  removeTrailingSlashes(canonicalPath);
286 
287  // We use our own local vector of directories in order to recurse,
288  // then add them to the end of pDirectories if it exists.
289 
290  // First, get the current path's listing
291  vector<FileInfo> dirs;
292  dirs.reserve(16); // might as well start with a "few" to avoid grows.
293 
294  // Keep adding them to the user specified regular file list if desired,
295  // but keep track of dirs ourself.
296  getListingForPath(canonicalPath, pRegularFiles, &dirs);
297 
298  // If the caller wanted directories, append them all to the return
299  if (pDirectories) {
300  pDirectories->insert(pDirectories->end(), dirs.begin(), dirs.end());
301  }
302 
303  // Finally, recurse on each directory in dirs
304  for (vector<FileInfo>::const_iterator it = dirs.begin(); it != dirs.end(); ++it) {
305  string subPath = canonicalPath + "/" + it->basename();
306  BESDEBUG(_sDebugChannel, "DirectoryUtil: recursing down to directory subtree=\"" << subPath << "\"..." << endl);
307  // Pass down the caller's accumulated vector's to be filled in.
308  getListingForPathRecursive(subPath, pRegularFiles, pDirectories);
309  }
310 
311 }
312 
313 void DirectoryUtil::getListingOfRegularFilesRecursive(const std::string& path, std::vector<FileInfo>& rRegularFiles)
314 {
315  // call the other one, not accumulated the directories, only recursing into them.
316  getListingForPathRecursive(path, &rRegularFiles, 0);
317 }
318 
319 void DirectoryUtil::throwErrorForOpendirFail(const string& fullPath)
320 {
321  switch (errno) {
322  case EACCES: {
323  string msg = "Permission denied for some directory in path=\"" + fullPath + "\"";
324  throw BESForbiddenError(msg, __FILE__, __LINE__);
325  }
326  break;
327 
328  case ELOOP: {
329  string msg = "A symlink loop was detected in path=\"" + fullPath + "\"";
330  throw BESNotFoundError(msg, __FILE__, __LINE__); // closest I can figure...
331  }
332  break;
333 
334  case ENAMETOOLONG: {
335  string msg = "A name in the path was too long. path=\"" + fullPath + "\"";
336  throw BESNotFoundError(msg, __FILE__, __LINE__);
337  }
338  break;
339 
340  case ENOENT: {
341  string msg = "Some part of the path was not found. path=\"" + fullPath + "\"";
342  throw BESNotFoundError(msg, __FILE__, __LINE__);
343  }
344  break;
345 
346  case ENOTDIR: {
347  string msg = "Some part of the path was not a directory. path=\"" + fullPath + "\"";
348  throw BESNotFoundError(msg, __FILE__, __LINE__);
349  }
350  break;
351 
352  case ENFILE: {
353  string msg = "Internal Error: Too many files are currently open!";
354  throw BESInternalError(msg, __FILE__, __LINE__);
355  }
356  break;
357 
358  default: {
359  string msg = "An unknown errno was found after opendir() was called on path=\"" + fullPath + "\"";
360  throw BESInternalError(msg, __FILE__, __LINE__);
361  }
362  }
363 }
364 
365 bool DirectoryUtil::matchesAllFilters(const std::string& path, time_t modTime) const
366 {
367  bool matches = true;
368  // Do the suffix first since it's fast
369  if (!_suffix.empty() && !matchesSuffix(path, _suffix)) {
370  matches = false;
371  }
372 
373  // Suffix matches and we have a regexp, check that
374  if (matches && _pRegExp) {
375  // match the full string, -1 on fail, num chars matching otherwise
376  int numCharsMatching = _pRegExp->match(path.c_str(), path.size(), 0);
377  matches = (numCharsMatching > 0); // TODO do we want to match the size()?
378  }
379 
380  if (matches && _filteringModTimes) {
381  matches = (modTime < _newestModTime);
382  }
383 
384  return matches;
385 }
386 
387 bool DirectoryUtil::hasRelativePath(const std::string& path)
388 {
389  return (path.find("..") != string::npos);
390 }
391 
393 {
394  if (!path.empty()) {
395  string::size_type pos = path.find_last_not_of("/");
396  if (pos != string::npos) {
397  path = path.substr(0, pos + 1);
398  }
399  }
400 }
401 
403 {
404  if (!path.empty()) {
405  string::size_type pos = path.find_first_not_of("/");
406  path = path.substr(pos, string::npos);
407  }
408 }
409 
410 void DirectoryUtil::printFileInfoList(const vector<FileInfo>& listing)
411 {
412  std::ostringstream oss;
413  printFileInfoList(oss, listing);
414  BESDEBUG(_sDebugChannel, oss.str() << endl);
415 }
416 
417 void DirectoryUtil::printFileInfoList(std::ostream& os, const vector<FileInfo>& listing)
418 {
419  for (vector<FileInfo>::const_iterator it = listing.begin(); it != listing.end(); ++it) {
420  os << it->toString() << endl;
421  }
422 }
423 
425 {
426  bool found;
427  string rootDir;
428  TheBESKeys::TheKeys()->get_value("BES.Catalog.catalog.RootDirectory", rootDir, found);
429  if (!found) {
430  TheBESKeys::TheKeys()->get_value("BES.Data.RootDirectory", rootDir, found);
431  }
432  if (!found) {
433  rootDir = "/";
434  }
435  return rootDir;
436 }
437 
438 bool DirectoryUtil::matchesSuffix(const std::string& filename, const std::string& suffix)
439 {
440  // see if the last suffix.size() characters match.
441  bool matches = (filename.find(suffix, filename.size() - suffix.size()) != string::npos);
442  return matches;
443 }
444 }
agg_util::DirectoryUtil::getListingOfRegularFilesRecursive
void getListingOfRegularFilesRecursive(const std::string &path, std::vector< FileInfo > &rRegularFiles)
Definition: DirectoryUtil.cc:313
agg_util::FileInfo
Definition: DirectoryUtil.h:46
agg_util::DirectoryUtil::removePrecedingSlashes
static void removePrecedingSlashes(std::string &path)
Definition: DirectoryUtil.cc:402
agg_util::DirectoryUtil::setRootDir
void setRootDir(const std::string &rootDir, bool allowRelativePaths=false, bool allowSymLinks=false)
Definition: DirectoryUtil.cc:184
BESNotFoundError
error thrown if the resource requested cannot be found
Definition: BESNotFoundError.h:40
agg_util::DirectoryUtil::setFilterRegExp
void setFilterRegExp(const std::string &regexp)
Definition: DirectoryUtil.cc:211
BESUtil::check_path
static void check_path(const std::string &path, const std::string &root, bool follow_sym_links)
Check if the specified path is valid.
Definition: BESUtil.cc:254
agg_util
Helper class for temporarily hijacking an existing dhi to load a DDX response for one particular file...
Definition: AggMemberDataset.cc:38
agg_util::DirectoryUtil::removeTrailingSlashes
static void removeTrailingSlashes(std::string &path)
Definition: DirectoryUtil.cc:392
TheBESKeys::TheKeys
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:62
agg_util::DirectoryUtil::setFilterSuffix
void setFilterSuffix(const std::string &suffix)
Definition: DirectoryUtil.cc:206
agg_util::FileInfo::FileInfo
FileInfo(const std::string &path, const std::string &basename, bool isDir, time_t modTime)
Definition: DirectoryUtil.cc:95
BESForbiddenError
error thrown if the BES is not allowed to access the resource requested
Definition: BESForbiddenError.h:40
BESInternalError
exception thrown if internal error encountered
Definition: BESInternalError.h:43
TheBESKeys::get_value
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:272
agg_util::DirectoryUtil::getRootDir
const std::string & getRootDir() const
Definition: DirectoryUtil.cc:174
agg_util::FileInfo::path
const std::string & path() const
Definition: DirectoryUtil.cc:108
agg_util::FileInfo::getModTimeAsString
std::string getModTimeAsString() const
Definition: DirectoryUtil.cc:129
agg_util::FileInfo::getFullPath
const std::string & getFullPath() const
Definition: DirectoryUtil.cc:140
agg_util::DirectoryUtil::printFileInfoList
static void printFileInfoList(std::ostream &os, const std::vector< FileInfo > &listing)
Definition: DirectoryUtil.cc:417
agg_util::DirectoryUtil::setFilterModTimeOlderThan
void setFilterModTimeOlderThan(time_t newestModTime)
Definition: DirectoryUtil.cc:225
agg_util::DirectoryUtil::getBESRootDir
static std::string getBESRootDir()
Definition: DirectoryUtil.cc:424
agg_util::DirectoryUtil::getListingForPath
void getListingForPath(const std::string &path, std::vector< FileInfo > *pRegularFiles, std::vector< FileInfo > *pDirectories)
Definition: DirectoryUtil.cc:231
agg_util::DirectoryUtil::getListingForPathRecursive
void getListingForPathRecursive(const std::string &path, std::vector< FileInfo > *pRegularFiles, std::vector< FileInfo > *pDirectories)
Definition: DirectoryUtil.cc:280
agg_util::DirectoryUtil::clearRegExp
void clearRegExp()
Definition: DirectoryUtil.cc:219
agg_util::DirectoryUtil::hasRelativePath
static bool hasRelativePath(const std::string &path)
Definition: DirectoryUtil.cc:387