pion-net
4.0.9
|
00001 // ------------------------------------------------------------------ 00002 // pion-net: a C++ framework for building lightweight HTTP interfaces 00003 // ------------------------------------------------------------------ 00004 // Copyright (C) 2007-2008 Atomic Labs, Inc. (http://www.atomiclabs.com) 00005 // 00006 // Distributed under the Boost Software License, Version 1.0. 00007 // See http://www.boost.org/LICENSE_1_0.txt 00008 // 00009 00010 #include <cstdlib> 00011 #include <boost/regex.hpp> 00012 #include <boost/logic/tribool.hpp> 00013 #include <pion/net/HTTPParser.hpp> 00014 #include <pion/net/HTTPRequest.hpp> 00015 #include <pion/net/HTTPResponse.hpp> 00016 #include <pion/net/HTTPMessage.hpp> 00017 00018 00019 namespace pion { // begin namespace pion 00020 namespace net { // begin namespace net (Pion Network Library) 00021 00022 00023 // static members of HTTPParser 00024 00025 const boost::uint32_t HTTPParser::STATUS_MESSAGE_MAX = 1024; // 1 KB 00026 const boost::uint32_t HTTPParser::METHOD_MAX = 1024; // 1 KB 00027 const boost::uint32_t HTTPParser::RESOURCE_MAX = 256 * 1024; // 256 KB 00028 const boost::uint32_t HTTPParser::QUERY_STRING_MAX = 1024 * 1024; // 1 MB 00029 const boost::uint32_t HTTPParser::HEADER_NAME_MAX = 1024; // 1 KB 00030 const boost::uint32_t HTTPParser::HEADER_VALUE_MAX = 1024 * 1024; // 1 MB 00031 const boost::uint32_t HTTPParser::QUERY_NAME_MAX = 1024; // 1 KB 00032 const boost::uint32_t HTTPParser::QUERY_VALUE_MAX = 1024 * 1024; // 1 MB 00033 const boost::uint32_t HTTPParser::COOKIE_NAME_MAX = 1024; // 1 KB 00034 const boost::uint32_t HTTPParser::COOKIE_VALUE_MAX = 1024 * 1024; // 1 MB 00035 const std::size_t HTTPParser::DEFAULT_CONTENT_MAX = 1024 * 1024; // 1 MB 00036 HTTPParser::ErrorCategory * HTTPParser::m_error_category_ptr = NULL; 00037 boost::once_flag HTTPParser::m_instance_flag = BOOST_ONCE_INIT; 00038 00039 00040 // HTTPParser member functions 00041 00042 boost::tribool HTTPParser::parse(HTTPMessage& http_msg, 00043 boost::system::error_code& ec) 00044 { 00045 PION_ASSERT(! eof() ); 00046 00047 boost::tribool rc = boost::indeterminate; 00048 std::size_t total_bytes_parsed = 0; 00049 00050 if(http_msg.hasMissingPackets()) { 00051 http_msg.setDataAfterMissingPacket(true); 00052 } 00053 00054 do { 00055 switch (m_message_parse_state) { 00056 // just started parsing the HTTP message 00057 case PARSE_START: 00058 m_message_parse_state = PARSE_HEADERS; 00059 // step through to PARSE_HEADERS 00060 00061 // parsing the HTTP headers 00062 case PARSE_HEADERS: 00063 rc = parseHeaders(http_msg, ec); 00064 total_bytes_parsed += m_bytes_last_read; 00065 // check if we have finished parsing HTTP headers 00066 if (rc == true) { 00067 // finishHeaderParsing() updates m_message_parse_state 00068 rc = finishHeaderParsing(http_msg, ec); 00069 } 00070 break; 00071 00072 // parsing chunked payload content 00073 case PARSE_CHUNKS: 00074 rc = parseChunks(http_msg.getChunkCache(), ec); 00075 total_bytes_parsed += m_bytes_last_read; 00076 // check if we have finished parsing all chunks 00077 if (rc == true) { 00078 http_msg.concatenateChunks(); 00079 } 00080 break; 00081 00082 // parsing regular payload content with a known length 00083 case PARSE_CONTENT: 00084 rc = consumeContent(http_msg, ec); 00085 total_bytes_parsed += m_bytes_last_read; 00086 break; 00087 00088 // parsing payload content with no length (until EOF) 00089 case PARSE_CONTENT_NO_LENGTH: 00090 consumeContentAsNextChunk(http_msg.getChunkCache()); 00091 total_bytes_parsed += m_bytes_last_read; 00092 break; 00093 00094 // finished parsing the HTTP message 00095 case PARSE_END: 00096 rc = true; 00097 break; 00098 } 00099 } while ( boost::indeterminate(rc) && ! eof() ); 00100 00101 // check if we've finished parsing the HTTP message 00102 if (rc == true) { 00103 m_message_parse_state = PARSE_END; 00104 finish(http_msg); 00105 } else if(rc == false) { 00106 computeMsgStatus(http_msg, false); 00107 } 00108 00109 // update bytes last read (aggregate individual operations for caller) 00110 m_bytes_last_read = total_bytes_parsed; 00111 00112 return rc; 00113 } 00114 00115 boost::tribool HTTPParser::parseMissingData(HTTPMessage& http_msg, 00116 std::size_t len, boost::system::error_code& ec) 00117 { 00118 static const char MISSING_DATA_CHAR = 'X'; 00119 boost::tribool rc = boost::indeterminate; 00120 00121 http_msg.setMissingPackets(true); 00122 00123 switch (m_message_parse_state) { 00124 00125 // cannot recover from missing data while parsing HTTP headers 00126 case PARSE_START: 00127 case PARSE_HEADERS: 00128 setError(ec, ERROR_MISSING_HEADER_DATA); 00129 rc = false; 00130 break; 00131 00132 // parsing chunked payload content 00133 case PARSE_CHUNKS: 00134 // parsing chunk data -> we can only recover if data fits into current chunk 00135 if (m_chunked_content_parse_state == PARSE_CHUNK 00136 && m_bytes_read_in_current_chunk < m_size_of_current_chunk 00137 && (m_size_of_current_chunk - m_bytes_read_in_current_chunk) >= len) 00138 { 00139 // use dummy content for missing data 00140 for (std::size_t n = 0; n < len && http_msg.getChunkCache().size() < m_max_content_length; ++n) 00141 http_msg.getChunkCache().push_back(MISSING_DATA_CHAR); 00142 00143 m_bytes_read_in_current_chunk += len; 00144 m_bytes_last_read = len; 00145 m_bytes_total_read += len; 00146 m_bytes_content_read += len; 00147 00148 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) { 00149 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK; 00150 } 00151 } else { 00152 // cannot recover from missing data 00153 setError(ec, ERROR_MISSING_CHUNK_DATA); 00154 rc = false; 00155 } 00156 break; 00157 00158 // parsing regular payload content with a known length 00159 case PARSE_CONTENT: 00160 // parsing content (with length) -> we can only recover if data fits into content 00161 if (m_bytes_content_remaining == 0) { 00162 // we have all of the remaining payload content 00163 rc = true; 00164 } else if (m_bytes_content_remaining < len) { 00165 // cannot recover from missing data 00166 setError(ec, ERROR_MISSING_TOO_MUCH_CONTENT); 00167 rc = false; 00168 } else { 00169 00170 // make sure content buffer is not already full 00171 if ( (m_bytes_content_read+len) <= m_max_content_length) { 00172 // use dummy content for missing data 00173 for (std::size_t n = 0; n < len; ++n) 00174 http_msg.getContent()[m_bytes_content_read++] = MISSING_DATA_CHAR; 00175 } else { 00176 m_bytes_content_read += len; 00177 } 00178 00179 m_bytes_content_remaining -= len; 00180 m_bytes_total_read += len; 00181 m_bytes_last_read = len; 00182 00183 if (m_bytes_content_remaining == 0) 00184 rc = true; 00185 } 00186 break; 00187 00188 // parsing payload content with no length (until EOF) 00189 case PARSE_CONTENT_NO_LENGTH: 00190 // use dummy content for missing data 00191 for (std::size_t n = 0; n < len && http_msg.getChunkCache().size() < m_max_content_length; ++n) 00192 http_msg.getChunkCache().push_back(MISSING_DATA_CHAR); 00193 m_bytes_last_read = len; 00194 m_bytes_total_read += len; 00195 m_bytes_content_read += len; 00196 break; 00197 00198 // finished parsing the HTTP message 00199 case PARSE_END: 00200 rc = true; 00201 break; 00202 } 00203 00204 // check if we've finished parsing the HTTP message 00205 if (rc == true) { 00206 m_message_parse_state = PARSE_END; 00207 finish(http_msg); 00208 } else if(rc == false) { 00209 computeMsgStatus(http_msg, false); 00210 } 00211 00212 return rc; 00213 } 00214 00215 boost::tribool HTTPParser::parseHeaders(HTTPMessage& http_msg, 00216 boost::system::error_code& ec) 00217 { 00218 // 00219 // note that boost::tribool may have one of THREE states: 00220 // 00221 // false: encountered an error while parsing HTTP headers 00222 // true: finished successfully parsing the HTTP headers 00223 // indeterminate: parsed bytes, but the HTTP headers are not yet finished 00224 // 00225 const char *read_start_ptr = m_read_ptr; 00226 m_bytes_last_read = 0; 00227 while (m_read_ptr < m_read_end_ptr) { 00228 00229 if (m_save_raw_headers) 00230 m_raw_headers += *m_read_ptr; 00231 00232 switch (m_headers_parse_state) { 00233 case PARSE_METHOD_START: 00234 // we have not yet started parsing the HTTP method string 00235 if (*m_read_ptr != ' ' && *m_read_ptr!='\r' && *m_read_ptr!='\n') { // ignore leading whitespace 00236 if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) { 00237 setError(ec, ERROR_METHOD_CHAR); 00238 return false; 00239 } 00240 m_headers_parse_state = PARSE_METHOD; 00241 m_method.erase(); 00242 m_method.push_back(*m_read_ptr); 00243 } 00244 break; 00245 00246 case PARSE_METHOD: 00247 // we have started parsing the HTTP method string 00248 if (*m_read_ptr == ' ') { 00249 m_resource.erase(); 00250 m_headers_parse_state = PARSE_URI_STEM; 00251 } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) { 00252 setError(ec, ERROR_METHOD_CHAR); 00253 return false; 00254 } else if (m_method.size() >= METHOD_MAX) { 00255 setError(ec, ERROR_METHOD_SIZE); 00256 return false; 00257 } else { 00258 m_method.push_back(*m_read_ptr); 00259 } 00260 break; 00261 00262 case PARSE_URI_STEM: 00263 // we have started parsing the URI stem (or resource name) 00264 if (*m_read_ptr == ' ') { 00265 m_headers_parse_state = PARSE_HTTP_VERSION_H; 00266 } else if (*m_read_ptr == '?') { 00267 m_query_string.erase(); 00268 m_headers_parse_state = PARSE_URI_QUERY; 00269 } else if (*m_read_ptr == '\r') { 00270 http_msg.setVersionMajor(0); 00271 http_msg.setVersionMinor(0); 00272 m_headers_parse_state = PARSE_EXPECTING_NEWLINE; 00273 } else if (*m_read_ptr == '\n') { 00274 http_msg.setVersionMajor(0); 00275 http_msg.setVersionMinor(0); 00276 m_headers_parse_state = PARSE_EXPECTING_CR; 00277 } else if (isControl(*m_read_ptr)) { 00278 setError(ec, ERROR_URI_CHAR); 00279 return false; 00280 } else if (m_resource.size() >= RESOURCE_MAX) { 00281 setError(ec, ERROR_URI_SIZE); 00282 return false; 00283 } else { 00284 m_resource.push_back(*m_read_ptr); 00285 } 00286 break; 00287 00288 case PARSE_URI_QUERY: 00289 // we have started parsing the URI query string 00290 if (*m_read_ptr == ' ') { 00291 m_headers_parse_state = PARSE_HTTP_VERSION_H; 00292 } else if (isControl(*m_read_ptr)) { 00293 setError(ec, ERROR_QUERY_CHAR); 00294 return false; 00295 } else if (m_query_string.size() >= QUERY_STRING_MAX) { 00296 setError(ec, ERROR_QUERY_SIZE); 00297 return false; 00298 } else { 00299 m_query_string.push_back(*m_read_ptr); 00300 } 00301 break; 00302 00303 case PARSE_HTTP_VERSION_H: 00304 // parsing "HTTP" 00305 if (*m_read_ptr == '\r') { 00306 // should only happen for requests (no HTTP/VERSION specified) 00307 if (! m_is_request) { 00308 setError(ec, ERROR_VERSION_EMPTY); 00309 return false; 00310 } 00311 http_msg.setVersionMajor(0); 00312 http_msg.setVersionMinor(0); 00313 m_headers_parse_state = PARSE_EXPECTING_NEWLINE; 00314 } else if (*m_read_ptr == '\n') { 00315 // should only happen for requests (no HTTP/VERSION specified) 00316 if (! m_is_request) { 00317 setError(ec, ERROR_VERSION_EMPTY); 00318 return false; 00319 } 00320 http_msg.setVersionMajor(0); 00321 http_msg.setVersionMinor(0); 00322 m_headers_parse_state = PARSE_EXPECTING_CR; 00323 } else if (*m_read_ptr != 'H') { 00324 setError(ec, ERROR_VERSION_CHAR); 00325 return false; 00326 } 00327 m_headers_parse_state = PARSE_HTTP_VERSION_T_1; 00328 break; 00329 00330 case PARSE_HTTP_VERSION_T_1: 00331 // parsing "HTTP" 00332 if (*m_read_ptr != 'T') { 00333 setError(ec, ERROR_VERSION_CHAR); 00334 return false; 00335 } 00336 m_headers_parse_state = PARSE_HTTP_VERSION_T_2; 00337 break; 00338 00339 case PARSE_HTTP_VERSION_T_2: 00340 // parsing "HTTP" 00341 if (*m_read_ptr != 'T') { 00342 setError(ec, ERROR_VERSION_CHAR); 00343 return false; 00344 } 00345 m_headers_parse_state = PARSE_HTTP_VERSION_P; 00346 break; 00347 00348 case PARSE_HTTP_VERSION_P: 00349 // parsing "HTTP" 00350 if (*m_read_ptr != 'P') { 00351 setError(ec, ERROR_VERSION_CHAR); 00352 return false; 00353 } 00354 m_headers_parse_state = PARSE_HTTP_VERSION_SLASH; 00355 break; 00356 00357 case PARSE_HTTP_VERSION_SLASH: 00358 // parsing slash after "HTTP" 00359 if (*m_read_ptr != '/') { 00360 setError(ec, ERROR_VERSION_CHAR); 00361 return false; 00362 } 00363 m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR_START; 00364 break; 00365 00366 case PARSE_HTTP_VERSION_MAJOR_START: 00367 // parsing the first digit of the major version number 00368 if (!isDigit(*m_read_ptr)) { 00369 setError(ec, ERROR_VERSION_CHAR); 00370 return false; 00371 } 00372 http_msg.setVersionMajor(*m_read_ptr - '0'); 00373 m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR; 00374 break; 00375 00376 case PARSE_HTTP_VERSION_MAJOR: 00377 // parsing the major version number (not first digit) 00378 if (*m_read_ptr == '.') { 00379 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR_START; 00380 } else if (isDigit(*m_read_ptr)) { 00381 http_msg.setVersionMajor( (http_msg.getVersionMajor() * 10) 00382 + (*m_read_ptr - '0') ); 00383 } else { 00384 setError(ec, ERROR_VERSION_CHAR); 00385 return false; 00386 } 00387 break; 00388 00389 case PARSE_HTTP_VERSION_MINOR_START: 00390 // parsing the first digit of the minor version number 00391 if (!isDigit(*m_read_ptr)) { 00392 setError(ec, ERROR_VERSION_CHAR); 00393 return false; 00394 } 00395 http_msg.setVersionMinor(*m_read_ptr - '0'); 00396 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR; 00397 break; 00398 00399 case PARSE_HTTP_VERSION_MINOR: 00400 // parsing the major version number (not first digit) 00401 if (*m_read_ptr == ' ') { 00402 // ignore trailing spaces after version in request 00403 if (! m_is_request) { 00404 m_headers_parse_state = PARSE_STATUS_CODE_START; 00405 } 00406 } else if (*m_read_ptr == '\r') { 00407 // should only happen for requests 00408 if (! m_is_request) { 00409 setError(ec, ERROR_STATUS_EMPTY); 00410 return false; 00411 } 00412 m_headers_parse_state = PARSE_EXPECTING_NEWLINE; 00413 } else if (*m_read_ptr == '\n') { 00414 // should only happen for requests 00415 if (! m_is_request) { 00416 setError(ec, ERROR_STATUS_EMPTY); 00417 return false; 00418 } 00419 m_headers_parse_state = PARSE_EXPECTING_CR; 00420 } else if (isDigit(*m_read_ptr)) { 00421 http_msg.setVersionMinor( (http_msg.getVersionMinor() * 10) 00422 + (*m_read_ptr - '0') ); 00423 } else { 00424 setError(ec, ERROR_VERSION_CHAR); 00425 return false; 00426 } 00427 break; 00428 00429 case PARSE_STATUS_CODE_START: 00430 // parsing the first digit of the response status code 00431 if (!isDigit(*m_read_ptr)) { 00432 setError(ec, ERROR_STATUS_CHAR); 00433 return false; 00434 } 00435 m_status_code = (*m_read_ptr - '0'); 00436 m_headers_parse_state = PARSE_STATUS_CODE; 00437 break; 00438 00439 case PARSE_STATUS_CODE: 00440 // parsing the response status code (not first digit) 00441 if (*m_read_ptr == ' ') { 00442 m_status_message.erase(); 00443 m_headers_parse_state = PARSE_STATUS_MESSAGE; 00444 } else if (isDigit(*m_read_ptr)) { 00445 m_status_code = ( (m_status_code * 10) + (*m_read_ptr - '0') ); 00446 } else if (*m_read_ptr == '\r') { 00447 // recover from status message not sent 00448 m_status_message.erase(); 00449 m_headers_parse_state = PARSE_EXPECTING_NEWLINE; 00450 } else if (*m_read_ptr == '\n') { 00451 // recover from status message not sent 00452 m_status_message.erase(); 00453 m_headers_parse_state = PARSE_EXPECTING_CR; 00454 } else { 00455 setError(ec, ERROR_STATUS_CHAR); 00456 return false; 00457 } 00458 break; 00459 00460 case PARSE_STATUS_MESSAGE: 00461 // parsing the response status message 00462 if (*m_read_ptr == '\r') { 00463 m_headers_parse_state = PARSE_EXPECTING_NEWLINE; 00464 } else if (*m_read_ptr == '\n') { 00465 m_headers_parse_state = PARSE_EXPECTING_CR; 00466 } else if (isControl(*m_read_ptr)) { 00467 setError(ec, ERROR_STATUS_CHAR); 00468 return false; 00469 } else if (m_status_message.size() >= STATUS_MESSAGE_MAX) { 00470 setError(ec, ERROR_STATUS_CHAR); 00471 return false; 00472 } else { 00473 m_status_message.push_back(*m_read_ptr); 00474 } 00475 break; 00476 00477 case PARSE_EXPECTING_NEWLINE: 00478 // we received a CR; expecting a newline to follow 00479 if (*m_read_ptr == '\n') { 00480 m_headers_parse_state = PARSE_HEADER_START; 00481 } else if (*m_read_ptr == '\r') { 00482 // we received two CR's in a row 00483 // assume CR only is (incorrectly) being used for line termination 00484 // therefore, the message is finished 00485 ++m_read_ptr; 00486 m_bytes_last_read = (m_read_ptr - read_start_ptr); 00487 m_bytes_total_read += m_bytes_last_read; 00488 return true; 00489 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') { 00490 m_headers_parse_state = PARSE_HEADER_WHITESPACE; 00491 } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) { 00492 setError(ec, ERROR_HEADER_CHAR); 00493 return false; 00494 } else { 00495 // assume it is the first character for the name of a header 00496 m_header_name.erase(); 00497 m_header_name.push_back(*m_read_ptr); 00498 m_headers_parse_state = PARSE_HEADER_NAME; 00499 } 00500 break; 00501 00502 case PARSE_EXPECTING_CR: 00503 // we received a newline without a CR 00504 if (*m_read_ptr == '\r') { 00505 m_headers_parse_state = PARSE_HEADER_START; 00506 } else if (*m_read_ptr == '\n') { 00507 // we received two newlines in a row 00508 // assume newline only is (incorrectly) being used for line termination 00509 // therefore, the message is finished 00510 ++m_read_ptr; 00511 m_bytes_last_read = (m_read_ptr - read_start_ptr); 00512 m_bytes_total_read += m_bytes_last_read; 00513 return true; 00514 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') { 00515 m_headers_parse_state = PARSE_HEADER_WHITESPACE; 00516 } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) { 00517 setError(ec, ERROR_HEADER_CHAR); 00518 return false; 00519 } else { 00520 // assume it is the first character for the name of a header 00521 m_header_name.erase(); 00522 m_header_name.push_back(*m_read_ptr); 00523 m_headers_parse_state = PARSE_HEADER_NAME; 00524 } 00525 break; 00526 00527 case PARSE_HEADER_WHITESPACE: 00528 // parsing whitespace before a header name 00529 if (*m_read_ptr == '\r') { 00530 m_headers_parse_state = PARSE_EXPECTING_NEWLINE; 00531 } else if (*m_read_ptr == '\n') { 00532 m_headers_parse_state = PARSE_EXPECTING_CR; 00533 } else if (*m_read_ptr != '\t' && *m_read_ptr != ' ') { 00534 if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) 00535 setError(ec, ERROR_HEADER_CHAR); 00536 return false; 00537 // assume it is the first character for the name of a header 00538 m_header_name.erase(); 00539 m_header_name.push_back(*m_read_ptr); 00540 m_headers_parse_state = PARSE_HEADER_NAME; 00541 } 00542 break; 00543 00544 case PARSE_HEADER_START: 00545 // parsing the start of a new header 00546 if (*m_read_ptr == '\r') { 00547 m_headers_parse_state = PARSE_EXPECTING_FINAL_NEWLINE; 00548 } else if (*m_read_ptr == '\n') { 00549 m_headers_parse_state = PARSE_EXPECTING_FINAL_CR; 00550 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') { 00551 m_headers_parse_state = PARSE_HEADER_WHITESPACE; 00552 } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) { 00553 setError(ec, ERROR_HEADER_CHAR); 00554 return false; 00555 } else { 00556 // first character for the name of a header 00557 m_header_name.erase(); 00558 m_header_name.push_back(*m_read_ptr); 00559 m_headers_parse_state = PARSE_HEADER_NAME; 00560 } 00561 break; 00562 00563 case PARSE_HEADER_NAME: 00564 // parsing the name of a header 00565 if (*m_read_ptr == ':') { 00566 m_header_value.erase(); 00567 m_headers_parse_state = PARSE_SPACE_BEFORE_HEADER_VALUE; 00568 } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) { 00569 setError(ec, ERROR_HEADER_CHAR); 00570 return false; 00571 } else if (m_header_name.size() >= HEADER_NAME_MAX) { 00572 setError(ec, ERROR_HEADER_NAME_SIZE); 00573 return false; 00574 } else { 00575 // character (not first) for the name of a header 00576 m_header_name.push_back(*m_read_ptr); 00577 } 00578 break; 00579 00580 case PARSE_SPACE_BEFORE_HEADER_VALUE: 00581 // parsing space character before a header's value 00582 if (*m_read_ptr == ' ') { 00583 m_headers_parse_state = PARSE_HEADER_VALUE; 00584 } else if (*m_read_ptr == '\r') { 00585 http_msg.addHeader(m_header_name, m_header_value); 00586 m_headers_parse_state = PARSE_EXPECTING_NEWLINE; 00587 } else if (*m_read_ptr == '\n') { 00588 http_msg.addHeader(m_header_name, m_header_value); 00589 m_headers_parse_state = PARSE_EXPECTING_CR; 00590 } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) { 00591 setError(ec, ERROR_HEADER_CHAR); 00592 return false; 00593 } else { 00594 // assume it is the first character for the value of a header 00595 m_header_value.push_back(*m_read_ptr); 00596 m_headers_parse_state = PARSE_HEADER_VALUE; 00597 } 00598 break; 00599 00600 case PARSE_HEADER_VALUE: 00601 // parsing the value of a header 00602 if (*m_read_ptr == '\r') { 00603 http_msg.addHeader(m_header_name, m_header_value); 00604 m_headers_parse_state = PARSE_EXPECTING_NEWLINE; 00605 } else if (*m_read_ptr == '\n') { 00606 http_msg.addHeader(m_header_name, m_header_value); 00607 m_headers_parse_state = PARSE_EXPECTING_CR; 00608 } else if (isControl(*m_read_ptr)) { 00609 setError(ec, ERROR_HEADER_CHAR); 00610 return false; 00611 } else if (m_header_value.size() >= HEADER_VALUE_MAX) { 00612 setError(ec, ERROR_HEADER_VALUE_SIZE); 00613 return false; 00614 } else { 00615 // character (not first) for the value of a header 00616 m_header_value.push_back(*m_read_ptr); 00617 } 00618 break; 00619 00620 case PARSE_EXPECTING_FINAL_NEWLINE: 00621 if (*m_read_ptr == '\n') ++m_read_ptr; 00622 m_bytes_last_read = (m_read_ptr - read_start_ptr); 00623 m_bytes_total_read += m_bytes_last_read; 00624 return true; 00625 00626 case PARSE_EXPECTING_FINAL_CR: 00627 if (*m_read_ptr == '\r') ++m_read_ptr; 00628 m_bytes_last_read = (m_read_ptr - read_start_ptr); 00629 m_bytes_total_read += m_bytes_last_read; 00630 return true; 00631 } 00632 00633 ++m_read_ptr; 00634 } 00635 00636 m_bytes_last_read = (m_read_ptr - read_start_ptr); 00637 m_bytes_total_read += m_bytes_last_read; 00638 return boost::indeterminate; 00639 } 00640 00641 void HTTPParser::updateMessageWithHeaderData(HTTPMessage& http_msg) const 00642 { 00643 if (isParsingRequest()) { 00644 00645 // finish an HTTP request message 00646 00647 HTTPRequest& http_request(dynamic_cast<HTTPRequest&>(http_msg)); 00648 http_request.setMethod(m_method); 00649 http_request.setResource(m_resource); 00650 http_request.setQueryString(m_query_string); 00651 00652 // parse query pairs from the URI query string 00653 if (! m_query_string.empty()) { 00654 if (! parseURLEncoded(http_request.getQueryParams(), 00655 m_query_string.c_str(), 00656 m_query_string.size())) 00657 PION_LOG_WARN(m_logger, "Request query string parsing failed (URI)"); 00658 } 00659 00660 // parse "Cookie" headers in request 00661 std::pair<HTTPTypes::Headers::const_iterator, HTTPTypes::Headers::const_iterator> 00662 cookie_pair = http_request.getHeaders().equal_range(HTTPTypes::HEADER_COOKIE); 00663 for (HTTPTypes::Headers::const_iterator cookie_iterator = cookie_pair.first; 00664 cookie_iterator != http_request.getHeaders().end() 00665 && cookie_iterator != cookie_pair.second; ++cookie_iterator) 00666 { 00667 if (! parseCookieHeader(http_request.getCookieParams(), 00668 cookie_iterator->second, false) ) 00669 PION_LOG_WARN(m_logger, "Cookie header parsing failed"); 00670 } 00671 00672 } else { 00673 00674 // finish an HTTP response message 00675 00676 HTTPResponse& http_response(dynamic_cast<HTTPResponse&>(http_msg)); 00677 http_response.setStatusCode(m_status_code); 00678 http_response.setStatusMessage(m_status_message); 00679 00680 // parse "Set-Cookie" headers in response 00681 std::pair<HTTPTypes::Headers::const_iterator, HTTPTypes::Headers::const_iterator> 00682 cookie_pair = http_response.getHeaders().equal_range(HTTPTypes::HEADER_SET_COOKIE); 00683 for (HTTPTypes::Headers::const_iterator cookie_iterator = cookie_pair.first; 00684 cookie_iterator != http_response.getHeaders().end() 00685 && cookie_iterator != cookie_pair.second; ++cookie_iterator) 00686 { 00687 if (! parseCookieHeader(http_response.getCookieParams(), 00688 cookie_iterator->second, true) ) 00689 PION_LOG_WARN(m_logger, "Set-Cookie header parsing failed"); 00690 } 00691 00692 } 00693 } 00694 00695 boost::tribool HTTPParser::finishHeaderParsing(HTTPMessage& http_msg, 00696 boost::system::error_code& ec) 00697 { 00698 boost::tribool rc = boost::indeterminate; 00699 00700 m_bytes_content_remaining = m_bytes_content_read = 0; 00701 http_msg.setContentLength(0); 00702 http_msg.updateTransferCodingUsingHeader(); 00703 updateMessageWithHeaderData(http_msg); 00704 00705 if (http_msg.isChunked()) { 00706 00707 // content is encoded using chunks 00708 m_message_parse_state = PARSE_CHUNKS; 00709 00710 // return true if parsing headers only 00711 if (m_parse_headers_only) 00712 rc = true; 00713 00714 } else if (http_msg.isContentLengthImplied()) { 00715 00716 // content length is implied to be zero 00717 m_message_parse_state = PARSE_END; 00718 rc = true; 00719 00720 } else { 00721 // content length should be specified in the headers 00722 00723 if (http_msg.hasHeader(HTTPTypes::HEADER_CONTENT_LENGTH)) { 00724 00725 // message has a content-length header 00726 try { 00727 http_msg.updateContentLengthUsingHeader(); 00728 } catch (...) { 00729 PION_LOG_ERROR(m_logger, "Unable to update content length"); 00730 setError(ec, ERROR_INVALID_CONTENT_LENGTH); 00731 return false; 00732 } 00733 00734 // check if content-length header == 0 00735 if (http_msg.getContentLength() == 0) { 00736 m_message_parse_state = PARSE_END; 00737 rc = true; 00738 } else { 00739 m_message_parse_state = PARSE_CONTENT; 00740 m_bytes_content_remaining = http_msg.getContentLength(); 00741 00742 // check if content-length exceeds maximum allowed 00743 if (m_bytes_content_remaining > m_max_content_length) 00744 http_msg.setContentLength(m_max_content_length); 00745 00746 // return true if parsing headers only 00747 if (m_parse_headers_only) 00748 rc = true; 00749 } 00750 00751 } else { 00752 // no content-length specified, and the content length cannot 00753 // otherwise be determined 00754 00755 // only if not a request, read through the close of the connection 00756 if (! m_is_request) { 00757 // clear the chunk buffers before we start 00758 http_msg.getChunkCache().clear(); 00759 00760 // continue reading content until there is no more data 00761 m_message_parse_state = PARSE_CONTENT_NO_LENGTH; 00762 00763 // return true if parsing headers only 00764 if (m_parse_headers_only) 00765 rc = true; 00766 } else { 00767 m_message_parse_state = PARSE_END; 00768 rc = true; 00769 } 00770 } 00771 } 00772 00773 // allocate a buffer for payload content (may be zero-size) 00774 http_msg.createContentBuffer(); 00775 00776 return rc; 00777 } 00778 00779 bool HTTPParser::parseURLEncoded(HTTPTypes::QueryParams& dict, 00780 const char *ptr, const size_t len) 00781 { 00782 // used to track whether we are parsing the name or value 00783 enum QueryParseState { 00784 QUERY_PARSE_NAME, QUERY_PARSE_VALUE 00785 } parse_state = QUERY_PARSE_NAME; 00786 00787 // misc other variables used for parsing 00788 const char * const end = ptr + len; 00789 std::string query_name; 00790 std::string query_value; 00791 00792 // iterate through each encoded character 00793 while (ptr < end) { 00794 switch (parse_state) { 00795 00796 case QUERY_PARSE_NAME: 00797 // parsing query name 00798 if (*ptr == '=') { 00799 // end of name found (OK if empty) 00800 parse_state = QUERY_PARSE_VALUE; 00801 } else if (*ptr == '&') { 00802 // if query name is empty, just skip it (i.e. "&&") 00803 if (! query_name.empty()) { 00804 // assume that "=" is missing -- it's OK if the value is empty 00805 dict.insert( std::make_pair(query_name, query_value) ); 00806 query_name.erase(); 00807 } 00808 } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') { 00809 // ignore linefeeds, carriage return and tabs (normally within POST content) 00810 } else if (isControl(*ptr) || query_name.size() >= QUERY_NAME_MAX) { 00811 // control character detected, or max sized exceeded 00812 return false; 00813 } else { 00814 // character is part of the name 00815 query_name.push_back(*ptr); 00816 } 00817 break; 00818 00819 case QUERY_PARSE_VALUE: 00820 // parsing query value 00821 if (*ptr == '&') { 00822 // end of value found (OK if empty) 00823 if (! query_name.empty()) { 00824 dict.insert( std::make_pair(query_name, query_value) ); 00825 query_name.erase(); 00826 } 00827 query_value.erase(); 00828 parse_state = QUERY_PARSE_NAME; 00829 } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') { 00830 // ignore linefeeds, carriage return and tabs (normally within POST content) 00831 } else if (isControl(*ptr) || query_value.size() >= QUERY_VALUE_MAX) { 00832 // control character detected, or max sized exceeded 00833 return false; 00834 } else { 00835 // character is part of the value 00836 query_value.push_back(*ptr); 00837 } 00838 break; 00839 } 00840 00841 ++ptr; 00842 } 00843 00844 // handle last pair in string 00845 if (! query_name.empty()) 00846 dict.insert( std::make_pair(query_name, query_value) ); 00847 00848 return true; 00849 } 00850 00851 bool HTTPParser::parseCookieHeader(HTTPTypes::CookieParams& dict, 00852 const char *ptr, const size_t len, 00853 bool set_cookie_header) 00854 { 00855 // BASED ON RFC 2109 00856 // http://www.ietf.org/rfc/rfc2109.txt 00857 // 00858 // The current implementation ignores cookie attributes which begin with '$' 00859 // (i.e. $Path=/, $Domain=, etc.) 00860 00861 // used to track what we are parsing 00862 enum CookieParseState { 00863 COOKIE_PARSE_NAME, COOKIE_PARSE_VALUE, COOKIE_PARSE_IGNORE 00864 } parse_state = COOKIE_PARSE_NAME; 00865 00866 // misc other variables used for parsing 00867 const char * const end = ptr + len; 00868 std::string cookie_name; 00869 std::string cookie_value; 00870 char value_quote_character = '\0'; 00871 00872 // iterate through each character 00873 while (ptr < end) { 00874 switch (parse_state) { 00875 00876 case COOKIE_PARSE_NAME: 00877 // parsing cookie name 00878 if (*ptr == '=') { 00879 // end of name found (OK if empty) 00880 value_quote_character = '\0'; 00881 parse_state = COOKIE_PARSE_VALUE; 00882 } else if (*ptr == ';' || *ptr == ',') { 00883 // ignore empty cookie names since this may occur naturally 00884 // when quoted values are encountered 00885 if (! cookie_name.empty()) { 00886 // value is empty (OK) 00887 if (! isCookieAttribute(cookie_name, set_cookie_header)) 00888 dict.insert( std::make_pair(cookie_name, cookie_value) ); 00889 cookie_name.erase(); 00890 } 00891 } else if (*ptr != ' ') { // ignore whitespace 00892 // check if control character detected, or max sized exceeded 00893 if (isControl(*ptr) || cookie_name.size() >= COOKIE_NAME_MAX) 00894 return false; 00895 // character is part of the name 00896 cookie_name.push_back(*ptr); 00897 } 00898 break; 00899 00900 case COOKIE_PARSE_VALUE: 00901 // parsing cookie value 00902 if (value_quote_character == '\0') { 00903 // value is not (yet) quoted 00904 if (*ptr == ';' || *ptr == ',') { 00905 // end of value found (OK if empty) 00906 if (! isCookieAttribute(cookie_name, set_cookie_header)) 00907 dict.insert( std::make_pair(cookie_name, cookie_value) ); 00908 cookie_name.erase(); 00909 cookie_value.erase(); 00910 parse_state = COOKIE_PARSE_NAME; 00911 } else if (*ptr == '\'' || *ptr == '"') { 00912 if (cookie_value.empty()) { 00913 // begin quoted value 00914 value_quote_character = *ptr; 00915 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) { 00916 // max size exceeded 00917 return false; 00918 } else { 00919 // assume character is part of the (unquoted) value 00920 cookie_value.push_back(*ptr); 00921 } 00922 } else if (*ptr != ' ' || !cookie_value.empty()) { // ignore leading unquoted whitespace 00923 // check if control character detected, or max sized exceeded 00924 if (isControl(*ptr) || cookie_value.size() >= COOKIE_VALUE_MAX) 00925 return false; 00926 // character is part of the (unquoted) value 00927 cookie_value.push_back(*ptr); 00928 } 00929 } else { 00930 // value is quoted 00931 if (*ptr == value_quote_character) { 00932 // end of value found (OK if empty) 00933 if (! isCookieAttribute(cookie_name, set_cookie_header)) 00934 dict.insert( std::make_pair(cookie_name, cookie_value) ); 00935 cookie_name.erase(); 00936 cookie_value.erase(); 00937 parse_state = COOKIE_PARSE_IGNORE; 00938 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) { 00939 // max size exceeded 00940 return false; 00941 } else { 00942 // character is part of the (quoted) value 00943 cookie_value.push_back(*ptr); 00944 } 00945 } 00946 break; 00947 00948 case COOKIE_PARSE_IGNORE: 00949 // ignore everything until we reach a comma "," or semicolon ";" 00950 if (*ptr == ';' || *ptr == ',') 00951 parse_state = COOKIE_PARSE_NAME; 00952 break; 00953 } 00954 00955 ++ptr; 00956 } 00957 00958 // handle last cookie in string 00959 if (! isCookieAttribute(cookie_name, set_cookie_header)) 00960 dict.insert( std::make_pair(cookie_name, cookie_value) ); 00961 00962 return true; 00963 } 00964 00965 boost::tribool HTTPParser::parseChunks(HTTPMessage::ChunkCache& chunk_cache, 00966 boost::system::error_code& ec) 00967 { 00968 // 00969 // note that boost::tribool may have one of THREE states: 00970 // 00971 // false: encountered an error while parsing message 00972 // true: finished successfully parsing the message 00973 // indeterminate: parsed bytes, but the message is not yet finished 00974 // 00975 const char *read_start_ptr = m_read_ptr; 00976 m_bytes_last_read = 0; 00977 while (m_read_ptr < m_read_end_ptr) { 00978 00979 switch (m_chunked_content_parse_state) { 00980 case PARSE_CHUNK_SIZE_START: 00981 // we have not yet started parsing the next chunk size 00982 if (isHexDigit(*m_read_ptr)) { 00983 m_chunk_size_str.erase(); 00984 m_chunk_size_str.push_back(*m_read_ptr); 00985 m_chunked_content_parse_state = PARSE_CHUNK_SIZE; 00986 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09' || *m_read_ptr == '\x0D' || *m_read_ptr == '\x0A') { 00987 // Ignore leading whitespace. Technically, the standard probably doesn't allow white space here, 00988 // but we'll be flexible, since there's no ambiguity. 00989 break; 00990 } else { 00991 setError(ec, ERROR_CHUNK_CHAR); 00992 return false; 00993 } 00994 break; 00995 00996 case PARSE_CHUNK_SIZE: 00997 if (isHexDigit(*m_read_ptr)) { 00998 m_chunk_size_str.push_back(*m_read_ptr); 00999 } else if (*m_read_ptr == '\x0D') { 01000 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE; 01001 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') { 01002 // Ignore trailing tabs or spaces. Technically, the standard probably doesn't allow this, 01003 // but we'll be flexible, since there's no ambiguity. 01004 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE; 01005 } else { 01006 setError(ec, ERROR_CHUNK_CHAR); 01007 return false; 01008 } 01009 break; 01010 01011 case PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE: 01012 if (*m_read_ptr == '\x0D') { 01013 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE; 01014 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') { 01015 // Ignore trailing tabs or spaces. Technically, the standard probably doesn't allow this, 01016 // but we'll be flexible, since there's no ambiguity. 01017 break; 01018 } else { 01019 setError(ec, ERROR_CHUNK_CHAR); 01020 return false; 01021 } 01022 break; 01023 01024 case PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE: 01025 // We received a CR; expecting LF to follow. We can't be flexible here because 01026 // if we see anything other than LF, we can't be certain where the chunk starts. 01027 if (*m_read_ptr == '\x0A') { 01028 m_bytes_read_in_current_chunk = 0; 01029 m_size_of_current_chunk = strtol(m_chunk_size_str.c_str(), 0, 16); 01030 if (m_size_of_current_chunk == 0) { 01031 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK; 01032 } else { 01033 m_chunked_content_parse_state = PARSE_CHUNK; 01034 } 01035 } else { 01036 setError(ec, ERROR_CHUNK_CHAR); 01037 return false; 01038 } 01039 break; 01040 01041 case PARSE_CHUNK: 01042 if (m_bytes_read_in_current_chunk < m_size_of_current_chunk) { 01043 if (chunk_cache.size() < m_max_content_length) 01044 chunk_cache.push_back(*m_read_ptr); 01045 m_bytes_read_in_current_chunk++; 01046 } 01047 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) { 01048 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK; 01049 } 01050 break; 01051 01052 case PARSE_EXPECTING_CR_AFTER_CHUNK: 01053 // we've read exactly m_size_of_current_chunk bytes since starting the current chunk 01054 if (*m_read_ptr == '\x0D') { 01055 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK; 01056 } else { 01057 setError(ec, ERROR_CHUNK_CHAR); 01058 return false; 01059 } 01060 break; 01061 01062 case PARSE_EXPECTING_LF_AFTER_CHUNK: 01063 // we received a CR; expecting LF to follow 01064 if (*m_read_ptr == '\x0A') { 01065 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START; 01066 } else { 01067 setError(ec, ERROR_CHUNK_CHAR); 01068 return false; 01069 } 01070 break; 01071 01072 case PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK: 01073 // we've read the final chunk; expecting final CRLF 01074 if (*m_read_ptr == '\x0D') { 01075 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK; 01076 } else { 01077 setError(ec, ERROR_CHUNK_CHAR); 01078 return false; 01079 } 01080 break; 01081 01082 case PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK: 01083 // we received the final CR; expecting LF to follow 01084 if (*m_read_ptr == '\x0A') { 01085 ++m_read_ptr; 01086 m_bytes_last_read = (m_read_ptr - read_start_ptr); 01087 m_bytes_total_read += m_bytes_last_read; 01088 m_bytes_content_read += m_bytes_last_read; 01089 PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete."); 01090 return true; 01091 } else { 01092 setError(ec, ERROR_CHUNK_CHAR); 01093 return false; 01094 } 01095 } 01096 01097 ++m_read_ptr; 01098 } 01099 01100 m_bytes_last_read = (m_read_ptr - read_start_ptr); 01101 m_bytes_total_read += m_bytes_last_read; 01102 m_bytes_content_read += m_bytes_last_read; 01103 return boost::indeterminate; 01104 } 01105 01106 boost::tribool HTTPParser::consumeContent(HTTPMessage& http_msg, 01107 boost::system::error_code& ec) 01108 { 01109 size_t content_bytes_to_read; 01110 size_t content_bytes_available = bytes_available(); 01111 boost::tribool rc = boost::indeterminate; 01112 01113 if (m_bytes_content_remaining == 0) { 01114 // we have all of the remaining payload content 01115 return true; 01116 } else { 01117 if (content_bytes_available >= m_bytes_content_remaining) { 01118 // we have all of the remaining payload content 01119 rc = true; 01120 content_bytes_to_read = m_bytes_content_remaining; 01121 } else { 01122 // only some of the payload content is available 01123 content_bytes_to_read = content_bytes_available; 01124 } 01125 m_bytes_content_remaining -= content_bytes_to_read; 01126 } 01127 01128 // make sure content buffer is not already full 01129 if (m_bytes_content_read < m_max_content_length) { 01130 if (m_bytes_content_read + content_bytes_to_read > m_max_content_length) { 01131 // read would exceed maximum size for content buffer 01132 // copy only enough bytes to fill up the content buffer 01133 memcpy(http_msg.getContent() + m_bytes_content_read, m_read_ptr, 01134 m_max_content_length - m_bytes_content_read); 01135 } else { 01136 // copy all bytes available 01137 memcpy(http_msg.getContent() + m_bytes_content_read, m_read_ptr, content_bytes_to_read); 01138 } 01139 } 01140 01141 m_read_ptr += content_bytes_to_read; 01142 m_bytes_content_read += content_bytes_to_read; 01143 m_bytes_total_read += content_bytes_to_read; 01144 m_bytes_last_read = content_bytes_to_read; 01145 01146 return rc; 01147 } 01148 01149 std::size_t HTTPParser::consumeContentAsNextChunk(HTTPMessage::ChunkCache& chunk_cache) 01150 { 01151 if (bytes_available() == 0) { 01152 m_bytes_last_read = 0; 01153 } else { 01154 m_bytes_last_read = (m_read_end_ptr - m_read_ptr); 01155 while (m_read_ptr < m_read_end_ptr) { 01156 if (chunk_cache.size() < m_max_content_length) 01157 chunk_cache.push_back(*m_read_ptr); 01158 ++m_read_ptr; 01159 } 01160 m_bytes_total_read += m_bytes_last_read; 01161 m_bytes_content_read += m_bytes_last_read; 01162 } 01163 return m_bytes_last_read; 01164 } 01165 01166 void HTTPParser::finish(HTTPMessage& http_msg) const 01167 { 01168 switch (m_message_parse_state) { 01169 case PARSE_START: 01170 http_msg.setIsValid(false); 01171 http_msg.setContentLength(0); 01172 http_msg.createContentBuffer(); 01173 return; 01174 case PARSE_END: 01175 http_msg.setIsValid(true); 01176 break; 01177 case PARSE_HEADERS: 01178 http_msg.setIsValid(false); 01179 updateMessageWithHeaderData(http_msg); 01180 http_msg.setContentLength(0); 01181 http_msg.createContentBuffer(); 01182 break; 01183 case PARSE_CONTENT: 01184 http_msg.setIsValid(false); 01185 if (getContentBytesRead() < m_max_content_length) // NOTE: we can read more than we have allocated/stored 01186 http_msg.setContentLength(getContentBytesRead()); 01187 break; 01188 case PARSE_CHUNKS: 01189 http_msg.setIsValid(m_chunked_content_parse_state==PARSE_CHUNK_SIZE_START); 01190 http_msg.concatenateChunks(); 01191 break; 01192 case PARSE_CONTENT_NO_LENGTH: 01193 http_msg.setIsValid(true); 01194 http_msg.concatenateChunks(); 01195 break; 01196 } 01197 01198 computeMsgStatus(http_msg, http_msg.isValid()); 01199 01200 if (isParsingRequest()) { 01201 // Parse query pairs from post content if content type is x-www-form-urlencoded. 01202 // Type could be followed by parameters (as defined in section 3.6 of RFC 2616) 01203 // e.g. Content-Type: application/x-www-form-urlencoded; charset=UTF-8 01204 HTTPRequest& http_request(dynamic_cast<HTTPRequest&>(http_msg)); 01205 const std::string& content_type_header = http_request.getHeader(HTTPTypes::HEADER_CONTENT_TYPE); 01206 if (content_type_header.compare(0, HTTPTypes::CONTENT_TYPE_URLENCODED.length(), 01207 HTTPTypes::CONTENT_TYPE_URLENCODED) == 0) 01208 { 01209 if (! parseURLEncoded(http_request.getQueryParams(), 01210 http_request.getContent(), 01211 http_request.getContentLength())) 01212 PION_LOG_WARN(m_logger, "Request query string parsing failed (POST content)"); 01213 } 01214 } 01215 } 01216 01217 void HTTPParser::computeMsgStatus(HTTPMessage& http_msg, bool msg_parsed_ok ) 01218 { 01219 HTTPMessage::DataStatus st = HTTPMessage::STATUS_NONE; 01220 01221 if(http_msg.hasMissingPackets()) { 01222 st = http_msg.hasDataAfterMissingPackets() ? 01223 HTTPMessage::STATUS_PARTIAL : HTTPMessage::STATUS_TRUNCATED; 01224 } else { 01225 st = msg_parsed_ok ? HTTPMessage::STATUS_OK : HTTPMessage::STATUS_TRUNCATED; 01226 } 01227 01228 http_msg.setStatus(st); 01229 } 01230 01231 void HTTPParser::createErrorCategory(void) 01232 { 01233 static ErrorCategory UNIQUE_ERROR_CATEGORY; 01234 m_error_category_ptr = &UNIQUE_ERROR_CATEGORY; 01235 } 01236 01237 bool HTTPParser::parseForwardedFor(const std::string& header, std::string& public_ip) 01238 { 01239 // static regex's used to check for ipv4 address 01240 static const boost::regex IPV4_ADDR_RX("[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}"); 01241 01247 static const boost::regex PRIVATE_NET_RX("(10\\.[0-9]{1,3}|127\\.[0-9]{1,3}|192\\.168|172\\.1[6-9]|172\\.2[0-9]|172\\.3[0-1])\\.[0-9]{1,3}\\.[0-9]{1,3}"); 01248 01249 // sanity check 01250 if (header.empty()) 01251 return false; 01252 01253 // local variables re-used by while loop 01254 boost::match_results<std::string::const_iterator> m; 01255 std::string::const_iterator start_it = header.begin(); 01256 01257 // search for next ip address within the header 01258 while (boost::regex_search(start_it, header.end(), m, IPV4_ADDR_RX)) { 01259 // get ip that matched 01260 std::string ip_str(m[0].first, m[0].second); 01261 // check if public network ip address 01262 if (! boost::regex_match(ip_str, PRIVATE_NET_RX) ) { 01263 // match found! 01264 public_ip = ip_str; 01265 return true; 01266 } 01267 // update search starting position 01268 start_it = m[0].second; 01269 } 01270 01271 // no matches found 01272 return false; 01273 } 01274 01275 } // end namespace net 01276 } // end namespace pion 01277