31 #include "ScanElement.h"
40 #include <sys/types.h>
43 #include "AggregationElement.h"
44 #include "DirectoryUtil.h"
45 #include "NCMLDebug.h"
46 #include "NCMLParser.h"
47 #include "NetcdfElement.h"
49 #include "SimpleTimeParser.h"
50 #include "XMLHelpers.h"
55 #include <unicode/smpdtfmt.h>
56 #include <unicode/timezone.h>
62 const string ScanElement::_sTypeName =
"scan";
63 const vector<string> ScanElement::_sValidAttrs = getValidAttributes();
66 struct ScanElement::DateFormatters {
68 _pDateFormat(0), _pISO8601(0), _markPos(0), _sdfLen(0)
73 SAFE_DELETE(_pDateFormat);
74 SAFE_DELETE(_pISO8601);
80 icu::SimpleDateFormat* _pDateFormat;
83 icu::SimpleDateFormat* _pISO8601;
95 ScanElement::ScanElement() :
96 RCObjectInterface(), NCMLElement(0), _location(
""), _suffix(
""), _regExp(
""), _subdirs(
""), _olderThan(
""), _dateFormatMark(
97 ""), _enhance(
""), _ncoords(
""), _pParent(0), _pDateFormatters(0)
101 ScanElement::ScanElement(
const ScanElement& proto) :
102 RCObjectInterface(), NCMLElement(0), _location(proto._location), _suffix(proto._suffix), _regExp(proto._regExp), _subdirs(
103 proto._subdirs), _olderThan(proto._olderThan), _dateFormatMark(proto._dateFormatMark), _enhance(proto._enhance), _ncoords(
104 proto._ncoords), _pParent(proto._pParent)
105 , _pDateFormatters(0)
107 if (!_dateFormatMark.empty()) {
108 initSimpleDateFormats(_dateFormatMark);
112 ScanElement::~ScanElement()
119 ScanElement::getParent()
const
130 ScanElement::getTypeName()
const
136 ScanElement::clone()
const
153 validateAttributes(attrs, _sValidAttrs);
156 throwOnUnhandledAttributes();
159 if (!_dateFormatMark.empty()) {
160 initSimpleDateFormats(_dateFormatMark);
164 void ScanElement::handleBegin()
166 if (!_parser->isScopeAggregation()) {
167 THROW_NCML_PARSE_ERROR(line(),
"ScanElement: " + toString() +
" "
168 "was not the direct child of an <aggregation> element as required!");
172 void ScanElement::handleContent(
const string& content)
175 NCMLElement::handleContent(content);
178 void ScanElement::handleEnd()
181 NetcdfElement* pCurrentDataset = _parser->getCurrentDataset();
182 VALID_PTR(pCurrentDataset);
184 NCML_ASSERT_MSG(pParentAgg,
"ScanElement::handleEnd(): Couldn't"
185 " find the the child aggregation of the current dataset, which is "
186 "supposed to be our parent!");
190 string ScanElement::toString()
const
192 return "<" + _sTypeName +
" " +
"location=\"" + _location +
"\" "
194 printAttributeIfNotEmpty(
"suffix", _suffix) + printAttributeIfNotEmpty(
"regExp", _regExp)
195 + printAttributeIfNotEmpty(
"subdirs", _subdirs) + printAttributeIfNotEmpty(
"olderThan", _olderThan)
196 + printAttributeIfNotEmpty(
"dateFormatMark", _dateFormatMark) + printAttributeIfNotEmpty(
"ncoords", _ncoords)
201 ScanElement::ncoords()
const
206 bool ScanElement::shouldScanSubdirs()
const
208 return (_subdirs ==
"true");
211 long ScanElement::getOlderThanAsSeconds()
const
213 if (_olderThan.empty()) {
220 THROW_NCML_PARSE_ERROR(line(),
"Couldn't parse the olderThan attribute! Expect a string of the form: "
221 "\"%d %units\" where %d is a number and %units is a time unit string such as "
222 " \"hours\" or \"s\".");
229 void ScanElement::getDatasetList(vector<NetcdfElement*>& datasets)
const
235 BESDEBUG(
"ncml",
"Scan will be relative to the BES root data path = " << scanner.
getRootDir() << endl);
237 setupFilters(scanner);
239 vector<FileInfo> files;
244 if (shouldScanSubdirs()) {
253 oss <<
"In processing " << toString() <<
" we got a BESNotFoundError with msg=";
255 oss <<
" Perhaps a path is incorrect?" << endl;
256 THROW_NCML_PARSE_ERROR(line(), oss.str());
263 BESDEBUG(
"ncml",
"Scan " << toString() <<
" returned matching regular files: " << endl);
265 BESDEBUG(
"ncml",
"WARNING: No matching files found!" << endl);
268 DirectoryUtil::printFileInfoList(files);
273 if (!_ncoords.empty()) {
275 "Scan has ncoords attribute specified: ncoords=" << _ncoords <<
" Will be inherited by all matching datasets!" << endl);
284 vector<NetcdfElement*> scannedDatasets;
285 scannedDatasets.reserve(files.size());
287 for (vector<FileInfo>::const_iterator it = files.begin(); it != files.end(); ++it) {
296 if (!_ncoords.empty()) {
303 if (!_dateFormatMark.empty()) {
304 string timeCoord = extractTimeFromFilename(it->basename());
305 BESDEBUG(
"ncml",
"Got an ISO 8601 time from dateFormatMark: " << timeCoord << endl);
310 RCPtr<NCMLElement> dataset = _parser->_elementFactory.makeElement(
"netcdf", attrs, *_parser);
311 VALID_PTR(dataset.get());
319 if (_dateFormatMark.empty())
321 BESDEBUG(
"ncml",
"Sorting scanned datasets by location()..." << endl);
322 std::sort(scannedDatasets.begin(), scannedDatasets.end(), NetcdfElement::isLocationLexicographicallyLessThan);
327 "Sorting scanned datasets by coordValue() since we got a dateFormatMark" " and the coordValue are ISO 8601 dates..." << endl);
328 std::sort(scannedDatasets.begin(), scannedDatasets.end(), NetcdfElement::isCoordValueLexicographicallyLessThan);
333 if (!_dateFormatMark.empty()) {
334 VALID_PTR(getParent());
335 getParent()->setAggregationVariableCoordinateAxisType(
"Time");
343 BESDEBUG(
"ncml",
"Adding the sorted scanned datasets to the current aggregation list..." << endl);
344 datasets.reserve(datasets.size() + scannedDatasets.size());
345 datasets.insert(datasets.end(), scannedDatasets.begin(), scannedDatasets.end());
351 if (!_suffix.empty()) {
352 BESDEBUG(
"ncml",
"Scan will filter against suffix=\"" << _suffix <<
"\"" << endl);
356 if (!_regExp.empty()) {
357 BESDEBUG(
"ncml",
"Scan will filter against the regExp=\"" << _regExp <<
"\"" << endl);
364 catch (libdap::Error& err) {
365 THROW_NCML_PARSE_ERROR(line(),
366 "There was a problem compiling the regExp=\"" + _regExp +
"\" : " + err.get_error_message());
370 if (!_olderThan.empty()) {
371 long secs = getOlderThanAsSeconds();
372 struct timeval tvNow;
373 gettimeofday(&tvNow, 0);
374 long cutoffTime = tvNow.tv_sec - secs;
377 "Setting scan filter modification time using duration: " << secs <<
" from the olderThan attribute=\"" << _olderThan <<
"\"" " The cutoff modification time based on now is: " << getTimeAsString(cutoffTime) << endl);
382 static const string ISO_8601_FORMAT =
"yyyy-MM-dd'T'HH:mm:ss'Z'";
388 static bool convertUnicodeStringToStdString(std::string& toString,
const icu::UnicodeString& fromUniString)
396 buffer.resize(fromUniString.length() + 1);
397 UErrorCode errorCode = U_ZERO_ERROR;
398 int32_t patternLen = fromUniString.extract(&buffer[0], buffer.size(), 0, errorCode);
399 if (patternLen >=
static_cast<int32_t
>(buffer.size()) || U_FAILURE(errorCode)) {
403 toString = std::string(&buffer[0]);
408 void ScanElement::initSimpleDateFormats(
const std::string& dateFormatMark)
412 _pDateFormatters =
new DateFormatters;
413 VALID_PTR(_pDateFormatters);
415 _pDateFormatters->_markPos = dateFormatMark.find_last_of(
"#");
416 if (_pDateFormatters->_markPos == string::npos) {
417 THROW_NCML_PARSE_ERROR(line(),
"The scan@dateFormatMark attribute did not contain"
418 " a marking # character before the date format!"
419 " dateFormatMark=\"" + dateFormatMark +
"\"");
423 string dateFormat = dateFormatMark.substr(_pDateFormatters->_markPos + 1, string::npos);
424 BESDEBUG(
"ncml",
"Using a date format of: " << dateFormat << endl);
425 icu::UnicodeString usDateFormat(dateFormat.c_str());
428 _pDateFormatters->_sdfLen = dateFormat.size();
431 UErrorCode success = U_ZERO_ERROR;
432 _pDateFormatters->_pDateFormat =
new icu::SimpleDateFormat(usDateFormat, success);
433 if (U_FAILURE(success)) {
434 THROW_NCML_PARSE_ERROR(line(),
"Scan element failed to parse the SimpleDateFormat pattern: " + dateFormat);
436 VALID_PTR(_pDateFormatters->_pDateFormat);
438 _pDateFormatters->_pDateFormat->setTimeZone(*(icu::TimeZone::getGMT()));
442 _pDateFormatters->_pISO8601 =
new icu::SimpleDateFormat(success);
443 if (U_FAILURE(success)) {
444 THROW_NCML_PARSE_ERROR(line(),
"Scan element failed to create the ISO 8601 SimpleDateFormat"
445 " using the pattern " + ISO_8601_FORMAT);
447 VALID_PTR(_pDateFormatters->_pISO8601);
449 _pDateFormatters->_pISO8601->setTimeZone(*(icu::TimeZone::getGMT()));
450 _pDateFormatters->_pISO8601->applyPattern(ISO_8601_FORMAT.c_str());
453 std::string ScanElement::extractTimeFromFilename(
const std::string& filename)
const
455 VALID_PTR(_pDateFormatters);
456 VALID_PTR(_pDateFormatters->_pDateFormat);
457 VALID_PTR(_pDateFormatters->_pISO8601);
461 string sdfPortion = filename.substr(_pDateFormatters->_markPos, _pDateFormatters->_sdfLen);
463 icu::UnicodeString usPattern;
464 _pDateFormatters->_pDateFormat->toPattern(usPattern);
466 bool conversionSuccess = convertUnicodeStringToStdString(sdfPattern, usPattern);
467 NCML_ASSERT_MSG(conversionSuccess,
468 "ScanElement::extractTimeFromFilename: couldn't convert the UnicodeString date pattern to a std::string!");
471 "Scan is now matching the date portion of the filename " << sdfPortion <<
" to the SimpleDateFormat=" "\"" << sdfPattern <<
"\"" << endl);
473 UErrorCode status = U_ZERO_ERROR;
474 UDate theDate = _pDateFormatters->_pDateFormat->parse(sdfPortion.c_str(), status);
475 if (U_FAILURE(status)) {
476 THROW_NCML_PARSE_ERROR(line(),
"SimpleDateFormat could not parse the pattern="
477 "\"" + sdfPattern +
"\""
478 " on the filename portion=" +
"\"" + sdfPortion +
"\""
479 " of the filename=" +
"\"" + filename +
"\""
480 " Either the pattern was invalid or the filename did not match.");
483 icu::UnicodeString usISODate;
484 _pDateFormatters->_pISO8601->format(theDate, usISODate);
486 conversionSuccess = convertUnicodeStringToStdString(result, usISODate);
487 NCML_ASSERT_MSG(conversionSuccess,
488 "ScanElement::extractTimeFromFilename: failed to convert the UnicodeString ISO date to a std::string!");
493 void ScanElement::deleteDateFormats() throw ()
495 SAFE_DELETE(_pDateFormatters);
498 vector<string> ScanElement::getValidAttributes()
500 vector<string> attrs;
501 attrs.push_back(
"location");
502 attrs.push_back(
"suffix");
503 attrs.push_back(
"regExp");
504 attrs.push_back(
"subdirs");
505 attrs.push_back(
"olderThan");
506 attrs.push_back(
"dateFormatMark");
510 attrs.push_back(
"enhance");
513 attrs.push_back(
"ncoords");
518 void ScanElement::throwOnUnhandledAttributes()
520 if (!_enhance.empty()) {
521 THROW_NCML_PARSE_ERROR(line(),
"ScanElement: Sorry, enhance attribute is not yet supported.");
525 std::string ScanElement::getTimeAsString(time_t theTime)
527 struct tm* pTM = gmtime(&theTime);
530 strftime(buf, 128,
"%F %T", pTM);