bes  Updated for version 3.20.6
GatewayUtils.cc
1 // GatewayUtils.cc
2 
3 // -*- mode: c++; c-basic-offset:4 -*-
4 
5 // This file is part of gateway_module, A C++ module that can be loaded in to
6 // the OPeNDAP Back-End Server (BES) and is able to handle remote requests.
7 
8 // Copyright (c) 2002,2003 OPeNDAP, Inc.
9 // Author: Patrick West <pwest@ucar.edu>
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 //
25 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
26 
27 // Authors:
28 // pcw Patrick West <pwest@ucar.edu>
29 
30 #include "config.h"
31 
32 #ifdef HAVE_UNISTD_H
33 #include <unistd.h>
34 #endif
35 #include <cstdlib>
36 #include <cstring>
37 #include <curl/curl.h>
38 
39 #include "GatewayUtils.h"
40 #include "GatewayResponseNames.h"
41 
42 #include <BESUtil.h>
43 #include <BESCatalogUtils.h>
44 #include <BESCatalogList.h>
45 #include <BESCatalog.h>
46 #include <BESRegex.h>
47 #include <TheBESKeys.h>
48 
49 #include <BESInternalError.h>
50 #include <BESDapError.h>
51 #include <BESSyntaxUserError.h>
52 #include <BESDebug.h>
53 
54 #include <GNURegex.h>
55 #include <util.h>
56 
57 using namespace libdap;
58 using namespace gateway;
59 
60 #if 0
61 std::vector<string> GatewayUtils::WhiteList;
62 #endif
63 std::map<string, string> GatewayUtils::MimeList;
64 string GatewayUtils::ProxyProtocol;
65 string GatewayUtils::ProxyHost;
66 string GatewayUtils::ProxyUser;
67 string GatewayUtils::ProxyPassword;
68 string GatewayUtils::ProxyUserPW;
69 int GatewayUtils::ProxyPort = 0;
70 int GatewayUtils::ProxyAuthType = 0;
71 bool GatewayUtils::useInternalCache = false;
72 
73 string GatewayUtils::NoProxyRegex;
74 
75 // Initialization routine for the gateway module for certain parameters
76 // and keys, like the white list, the MimeTypes translation.
77 void GatewayUtils::Initialize()
78 {
79 #if 0
80  // Whitelist - list of domain that the gateway is allowed to
81  // communicate with.
82  bool found = false;
83  string key = Gateway_WHITELIST;
84  TheBESKeys::TheKeys()->get_values(key, WhiteList, found);
85  if (!found || WhiteList.size() == 0) {
86  string err = (string) "The parameter " + Gateway_WHITELIST + " is not set or has no values in the gateway"
87  + " configuration file";
88  throw BESSyntaxUserError(err, __FILE__, __LINE__);
89 
90  }
91 #endif
92 
93  // MimeTypes - translate from a mime type to a module name
94  bool found = false;
95  std::string key = Gateway_MIMELIST;
96  std::vector<string> vals;
97  TheBESKeys::TheKeys()->get_values(key, vals, found);
98  if (found && vals.size()) {
99  std::vector<string>::iterator i = vals.begin();
100  std::vector<string>::iterator e = vals.end();
101  for (; i != e; i++) {
102  size_t colon = (*i).find(":");
103  if (colon == string::npos) {
104  string err = (string) "Malformed " + Gateway_MIMELIST + " " + (*i)
105  + " specified in the gateway configuration";
106  throw BESSyntaxUserError(err, __FILE__, __LINE__);
107  }
108  string mod = (*i).substr(0, colon);
109  string mime = (*i).substr(colon + 1);
110  MimeList[mod] = mime;
111  }
112  }
113 
114  found = false;
115  key = Gateway_PROXYHOST;
116  TheBESKeys::TheKeys()->get_value(key, GatewayUtils::ProxyHost, found);
117  if (found && !GatewayUtils::ProxyHost.empty()) {
118  // if the proxy host is set, then check to see if the port is
119  // set. Does not need to be.
120  found = false;
121  key = Gateway_PROXYPORT;
122  string port;
123  TheBESKeys::TheKeys()->get_value(key, port, found);
124  if (found && !port.empty()) {
125  GatewayUtils::ProxyPort = atoi(port.c_str());
126  if (!GatewayUtils::ProxyPort) {
127  string err = (string) "gateway proxy host specified," + " but proxy port specified is invalid";
128  throw BESSyntaxUserError(err, __FILE__, __LINE__);
129  }
130  }
131 
132  // @TODO Either use this or remove it - right now this variable is never used downstream
133  // find the protocol to use for the proxy server. If none set,
134  // default to http
135  found = false;
136  key = Gateway_PROXYPROTOCOL;
137  TheBESKeys::TheKeys()->get_value(key, GatewayUtils::ProxyProtocol, found);
138  if (!found || GatewayUtils::ProxyProtocol.empty()) {
139  GatewayUtils::ProxyProtocol = "http";
140  }
141 
142  // find the user to use for authenticating with the proxy server. If none set,
143  // default to ""
144  found = false;
145  key = Gateway_PROXYUSER;
146  TheBESKeys::TheKeys()->get_value(key, GatewayUtils::ProxyUser, found);
147  if (!found) {
148  GatewayUtils::ProxyUser = "";
149  }
150 
151  // find the password to use for authenticating with the proxy server. If none set,
152  // default to ""
153  found = false;
154  key = Gateway_PROXYPASSWORD;
155  TheBESKeys::TheKeys()->get_value(key, GatewayUtils::ProxyPassword, found);
156  if (!found) {
157  GatewayUtils::ProxyPassword = "";
158  }
159 
160  // find the user:password string to use for authenticating with the proxy server. If none set,
161  // default to ""
162  found = false;
163  key = Gateway_PROXYUSERPW;
164  TheBESKeys::TheKeys()->get_value(key, GatewayUtils::ProxyUserPW, found);
165  if (!found) {
166  GatewayUtils::ProxyUserPW = "";
167  }
168 
169  // find the authentication mechanism to use with the proxy server. If none set,
170  // default to BASIC authentication.
171  found = false;
172  key = Gateway_PROXYAUTHTYPE;
173  string authType;
174  TheBESKeys::TheKeys()->get_value(key, authType, found);
175  if (found) {
176  authType = BESUtil::lowercase(authType);
177  if (authType == "basic") {
178  GatewayUtils::ProxyAuthType = CURLAUTH_BASIC;
179  BESDEBUG("gateway", "GatewayUtils::Initialize() - ProxyAuthType BASIC set." << endl);
180  }
181  else if (authType == "digest") {
182  GatewayUtils::ProxyAuthType = CURLAUTH_DIGEST;
183  BESDEBUG("gateway", "GatewayUtils::Initialize() - ProxyAuthType DIGEST set." << endl);
184  }
185 
186  else if (authType == "ntlm") {
187  GatewayUtils::ProxyAuthType = CURLAUTH_NTLM;
188  BESDEBUG("gateway", "GatewayUtils::Initialize() - ProxyAuthType NTLM set." << endl);
189  }
190  else {
191  GatewayUtils::ProxyAuthType = CURLAUTH_BASIC;
192  BESDEBUG("gateway",
193  "GatewayUtils::Initialize() - User supplied an invalid value '"<< authType << "' for Gateway.ProxyAuthType. Falling back to BASIC authentication scheme." << endl);
194  }
195 
196  }
197  else {
198  GatewayUtils::ProxyAuthType = CURLAUTH_BASIC;
199  }
200 
201  }
202 
203  found = false;
204  key = Gateway_USE_INTERNAL_CACHE;
205  string use_cache;
206  TheBESKeys::TheKeys()->get_value(key, use_cache, found);
207  if (found) {
208  if (use_cache == "true" || use_cache == "TRUE" || use_cache == "True" || use_cache == "yes"
209  || use_cache == "YES" || use_cache == "Yes")
210  GatewayUtils::useInternalCache = true;
211  else
212  GatewayUtils::useInternalCache = false;
213  }
214  else {
215  // If not set, default to false. Assume squid or ...
216  GatewayUtils::useInternalCache = false;
217  }
218 
219  // Grab the value for the NoProxy regex; empty if there is none.
220  found = false; // Not used
221  TheBESKeys::TheKeys()->get_value("Gateway.NoProxy", GatewayUtils::NoProxyRegex, found);
222 }
223 
224 // Not used. There's a better version of this that returns a string in libdap.
225 // jhrg 3/24/11
226 
227 #if 0
228 // Look around for a reasonable place to put a temporary file. Check first
229 // the value of the TMPDIR env var. If that does not yield a path that's
230 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
231 // defined in stdio.h. If both come up empty, then use `./'.
232 //
233 // This function allocates storage using new. The caller must delete the char
234 // array.
235 
236 // Change this to a version that either returns a string or an open file
237 // descriptor. Use information from https://buildsecurityin.us-cert.gov/
238 // (see open()) to make it more secure. Ideal solution: get deserialize()
239 // methods to read from a stream returned by libcurl, not from a temporary
240 // file. 9/21/07 jhrg
241 char *
242 GatewayUtils::Get_tempfile_template( char *file_template )
243 {
244 #ifdef WIN32
245  // white list for a WIN32 directory
246  Regex directory("[-a-zA-Z0-9_\\]*");
247 
248  string c = getenv("TEMP") ? getenv("TEMP") : "";
249  if (!c.empty() && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
250  goto valid_temp_directory;
251 
252  c = getenv("TMP") ? getenv("TMP") : "";
253  if (!c.empty() && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
254  goto valid_temp_directory;
255 #else
256  // white list for a directory
257  Regex directory("[-a-zA-Z0-9_/]*");
258 
259  string c = getenv("TMPDIR") ? getenv("TMPDIR") : "";
260  if (!c.empty() && directory.match(c.c_str(), c.length())
261  && (access(c.c_str(), W_OK | R_OK) == 0))
262  goto valid_temp_directory;
263 
264 #ifdef P_tmpdir
265  if (access(P_tmpdir, W_OK | R_OK) == 0) {
266  c = P_tmpdir;
267  goto valid_temp_directory;
268  }
269 #endif
270 
271 #endif // WIN32
272  c = ".";
273 
274  valid_temp_directory:
275 
276 #ifdef WIN32
277  c.append("\\");
278 #else
279  c.append("/");
280 #endif
281  c.append(file_template);
282 
283  char *temp = new char[c.length() + 1];
284  strncpy(temp, c.c_str(), c.length());
285  temp[c.length()] = '\0';
286 
287  return temp;
288 }
289 #endif
290 
304 void GatewayUtils::Get_type_from_disposition(const string &disp, string &type)
305 {
306  // If this function extracts a filename from disp and it matches a handler's
307  // regex using the Catalog Utils, this will be set to a non-empty value.
308  type = "";
309 
310  size_t fnpos = disp.find("filename");
311  if (fnpos != string::npos) {
312  // Got the filename attribute, now get the
313  // filename, which is after the pound sign (#) or the equal sign (=)
314  size_t pos = disp.find("#", fnpos);
315  if (pos == string::npos) pos = disp.find("=", fnpos);
316 
317  if (pos != string::npos) {
318  // Got the filename to the end of the
319  // string, now get it to either the end of
320  // the string or the start of the next
321  // attribute
322  string filename;
323  size_t sp = disp.find(" ", pos);
324  if (pos != string::npos) {
325  // space before the next attribute
326  filename = disp.substr(pos + 1, sp - pos - 1);
327  }
328  else {
329  // to the end of the string
330  filename = disp.substr(pos + 1);
331  }
332 
333  // now see if it's wrapped in quotes
334  if (filename[0] == '"') {
335  filename = filename.substr(1);
336  }
337  if (filename[filename.length() - 1] == '"') {
338  filename = filename.substr(0, filename.length() - 1);
339  }
340 
341  // we have the filename now, run it through
342  // the type match to get the file type
343 #if 0
344  const BESCatalogUtils *utils = BESCatalogUtils::Utils(BESCatalogList::TheCatalogList()->default_catalog_name());
345 #endif
346 
348  type = utils->get_handler_name(filename);
349 
350 #if 0
351  BESCatalogUtils::match_citer i = utils->match_list_begin();
352  BESCatalogUtils::match_citer ie = utils->match_list_end();
353  bool done = false;
354  for (; i != ie && !done; i++) {
355  BESCatalogUtils::handler_regex match = (*i);
356  try {
357  BESDEBUG("gateway",
358  " Comparing disp filename " << filename << " against expr " << match.regex << endl);
359  BESRegex reg_expr(match.regex.c_str());
360  if (reg_expr.match(filename.c_str(), filename.length()) == static_cast<int>(filename.length())) {
361  type = match.handler;
362  done = true;
363  }
364  }
365  // This will not catch the error throw by BESRegex() - that is an BESInteranlError.
366  // BESRegex::match does not throw. jhrg 7/27/18
367  catch (Error &e) {
368  string serr = (string) "Unable to match data type, " + "malformed Catalog TypeMatch parameter "
369  + "in bes configuration file around " + match.regex + ": " + e.get_error_message();
370  throw BESDapError(serr, false, e.get_error_code(), __FILE__, __LINE__);
371  }
372  }
373 #endif
374  }
375  }
376 }
377 
378 void GatewayUtils::Get_type_from_content_type(const string &ctype, string &type)
379 {
380  BESDEBUG("gateway", "GatewayUtils::Get_type_from_content_type() - BEGIN" << endl);
381  map<string, string>::iterator i = MimeList.begin();
382  map<string, string>::iterator e = MimeList.end();
383  bool done = false;
384  for (; i != e && !done; i++) {
385  BESDEBUG("gateway",
386  "GatewayUtils::Get_type_from_content_type() - Comparing content type '" << ctype << "' against mime list element '" << (*i).second << "'"<< endl);
387  BESDEBUG("gateway",
388  "GatewayUtils::Get_type_from_content_type() - first: " << (*i).first << " second: " << (*i).second << endl);
389 
390  if ((*i).second == ctype) {
391 
392  BESDEBUG("gateway", "GatewayUtils::Get_type_from_content_type() - MATCH" << endl);
393 
394  type = (*i).first;
395  done = true;
396  }
397  }
398  BESDEBUG("gateway", "GatewayUtils::Get_type_from_content_type() - END" << endl);
399 }
400 
401 void GatewayUtils::Get_type_from_url(const string &url, string &type)
402 {
403  // just run the url through the type match from the configuration
404 #if 0
405  const BESCatalogUtils *utils = BESCatalogUtils::Utils(BESCatalogList::TheCatalogList()->default_catalog_name());
406 #endif
408  type = utils->get_handler_name(url);
409 
410 #if 0
411  BESCatalogUtils::match_citer i = utils->match_list_begin();
412  BESCatalogUtils::match_citer ie = utils->match_list_end();
413  bool done = false;
414  for (; i != ie && !done; i++) {
415  BESCatalogUtils::handler_regex match = (*i);
416  try {
417  BESDEBUG("gateway",
418  "GatewayUtils::Get_type_from_url() - Comparing url " << url << " against type match expr " << match.regex << endl);
419  BESRegex reg_expr(match.regex.c_str());
420  if (reg_expr.match(url.c_str(), url.length()) == static_cast<int>(url.length())) {
421  type = match.handler;
422  done = true;
423  BESDEBUG("gateway", "GatewayUtils::Get_type_from_url() - MATCH type: " << type << endl);
424  }
425  }
426  catch (Error &e) {
427  string serr = (string) "Unable to match data type, " + "malformed Catalog TypeMatch parameter "
428  + "in bes configuration file around " + match.regex + ": " + e.get_error_message();
429  throw BESInternalError(serr, __FILE__, __LINE__);
430  }
431  }
432 #endif
433 
434 }
435 
436 #if 0
437 bool GatewayUtils::Is_Whitelisted(const std::string &url){
438  bool whitelisted = false;
439  std::vector<std::string>::const_iterator i = WhiteList.begin();
440  std::vector<std::string>::const_iterator e = WhiteList.end();
441  for (; i != e && !whitelisted; i++) {
442  if ((*i).length() <= url.length()) {
443  if (url.substr(0, (*i).length()) == (*i)) {
444  whitelisted = true;
445  }
446  }
447  }
448  return whitelisted;
449 }
450 
451 #endif
452 
453 
454 
455 
BESCatalogUtils
Definition: BESCatalogUtils.h:61
BESCatalogList::default_catalog
virtual BESCatalog * default_catalog() const
The the default catalog.
Definition: BESCatalogList.h:118
libdap
Definition: BESDapFunctionResponseCache.h:35
BESCatalogList::TheCatalogList
static BESCatalogList * TheCatalogList()
Get the singleton BESCatalogList instance.
Definition: BESCatalogList.cc:81
TheBESKeys::TheKeys
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:62
BESCatalogUtils::get_handler_name
std::string get_handler_name(const std::string &item) const
Find the handler name that will process.
Definition: BESCatalogUtils.cc:422
BESSyntaxUserError
error thrown if there is a user syntax error in the request or any other user error
Definition: BESSyntaxUserError.h:41
BESInternalError
exception thrown if internal error encountered
Definition: BESInternalError.h:43
BESCatalog::get_catalog_utils
virtual BESCatalogUtils * get_catalog_utils() const
Get a pointer to the utilities, customized for this catalog.
Definition: BESCatalog.h:113
TheBESKeys::get_value
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:272
TheBESKeys::get_values
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
Definition: TheBESKeys.cc:303
Error
BESUtil::lowercase
static std::string lowercase(const std::string &s)
Definition: BESUtil.cc:200
BESRegex
Definition: BESRegex.h:41
BESDapError
error object created from libdap error objects and can handle those errors
Definition: BESDapError.h:59
BESRegex::match
int match(const char *s, int len, int pos=0)
Does the pattern match.
Definition: BESRegex.cc:105