pion-net  4.0.9
net/src/HTTPParser.cpp
00001 // ------------------------------------------------------------------
00002 // pion-net: a C++ framework for building lightweight HTTP interfaces
00003 // ------------------------------------------------------------------
00004 // Copyright (C) 2007-2008 Atomic Labs, Inc.  (http://www.atomiclabs.com)
00005 //
00006 // Distributed under the Boost Software License, Version 1.0.
00007 // See http://www.boost.org/LICENSE_1_0.txt
00008 //
00009 
00010 #include <cstdlib>
00011 #include <boost/regex.hpp>
00012 #include <boost/logic/tribool.hpp>
00013 #include <pion/net/HTTPParser.hpp>
00014 #include <pion/net/HTTPRequest.hpp>
00015 #include <pion/net/HTTPResponse.hpp>
00016 #include <pion/net/HTTPMessage.hpp>
00017 
00018 
00019 namespace pion {    // begin namespace pion
00020 namespace net {     // begin namespace net (Pion Network Library)
00021 
00022 
00023 // static members of HTTPParser
00024 
00025 const boost::uint32_t   HTTPParser::STATUS_MESSAGE_MAX = 1024;  // 1 KB
00026 const boost::uint32_t   HTTPParser::METHOD_MAX = 1024;  // 1 KB
00027 const boost::uint32_t   HTTPParser::RESOURCE_MAX = 256 * 1024;  // 256 KB
00028 const boost::uint32_t   HTTPParser::QUERY_STRING_MAX = 1024 * 1024; // 1 MB
00029 const boost::uint32_t   HTTPParser::HEADER_NAME_MAX = 1024; // 1 KB
00030 const boost::uint32_t   HTTPParser::HEADER_VALUE_MAX = 1024 * 1024; // 1 MB
00031 const boost::uint32_t   HTTPParser::QUERY_NAME_MAX = 1024;  // 1 KB
00032 const boost::uint32_t   HTTPParser::QUERY_VALUE_MAX = 1024 * 1024;  // 1 MB
00033 const boost::uint32_t   HTTPParser::COOKIE_NAME_MAX = 1024; // 1 KB
00034 const boost::uint32_t   HTTPParser::COOKIE_VALUE_MAX = 1024 * 1024; // 1 MB
00035 const std::size_t       HTTPParser::DEFAULT_CONTENT_MAX = 1024 * 1024;  // 1 MB
00036 HTTPParser::ErrorCategory * HTTPParser::m_error_category_ptr = NULL;
00037 boost::once_flag            HTTPParser::m_instance_flag = BOOST_ONCE_INIT;
00038 
00039 
00040 // HTTPParser member functions
00041 
00042 boost::tribool HTTPParser::parse(HTTPMessage& http_msg,
00043     boost::system::error_code& ec)
00044 {
00045     PION_ASSERT(! eof() );
00046 
00047     boost::tribool rc = boost::indeterminate;
00048     std::size_t total_bytes_parsed = 0;
00049 
00050     if(http_msg.hasMissingPackets()) {
00051         http_msg.setDataAfterMissingPacket(true);
00052     }
00053 
00054     do {
00055         switch (m_message_parse_state) {
00056             // just started parsing the HTTP message
00057             case PARSE_START:
00058                 m_message_parse_state = PARSE_HEADERS;
00059                 // step through to PARSE_HEADERS
00060 
00061             // parsing the HTTP headers
00062             case PARSE_HEADERS:
00063                 rc = parseHeaders(http_msg, ec);
00064                 total_bytes_parsed += m_bytes_last_read;
00065                 // check if we have finished parsing HTTP headers
00066                 if (rc == true) {
00067                     // finishHeaderParsing() updates m_message_parse_state
00068                     rc = finishHeaderParsing(http_msg, ec);
00069                 }
00070                 break;
00071 
00072             // parsing chunked payload content
00073             case PARSE_CHUNKS:
00074                 rc = parseChunks(http_msg.getChunkCache(), ec);
00075                 total_bytes_parsed += m_bytes_last_read;
00076                 // check if we have finished parsing all chunks
00077                 if (rc == true) {
00078                     http_msg.concatenateChunks();
00079                 }
00080                 break;
00081 
00082             // parsing regular payload content with a known length
00083             case PARSE_CONTENT:
00084                 rc = consumeContent(http_msg, ec);
00085                 total_bytes_parsed += m_bytes_last_read;
00086                 break;
00087 
00088             // parsing payload content with no length (until EOF)
00089             case PARSE_CONTENT_NO_LENGTH:
00090                 consumeContentAsNextChunk(http_msg.getChunkCache());
00091                 total_bytes_parsed += m_bytes_last_read;
00092                 break;
00093 
00094             // finished parsing the HTTP message
00095             case PARSE_END:
00096                 rc = true;
00097                 break;
00098         }
00099     } while ( boost::indeterminate(rc) && ! eof() );
00100 
00101     // check if we've finished parsing the HTTP message
00102     if (rc == true) {
00103         m_message_parse_state = PARSE_END;
00104         finish(http_msg);
00105     } else if(rc == false) {
00106         computeMsgStatus(http_msg, false);
00107     }
00108 
00109     // update bytes last read (aggregate individual operations for caller)
00110     m_bytes_last_read = total_bytes_parsed;
00111 
00112     return rc;
00113 }
00114 
00115 boost::tribool HTTPParser::parseMissingData(HTTPMessage& http_msg,
00116     std::size_t len, boost::system::error_code& ec)
00117 {
00118     static const char MISSING_DATA_CHAR = 'X';
00119     boost::tribool rc = boost::indeterminate;
00120 
00121     http_msg.setMissingPackets(true);
00122 
00123     switch (m_message_parse_state) {
00124 
00125         // cannot recover from missing data while parsing HTTP headers
00126         case PARSE_START:
00127         case PARSE_HEADERS:
00128             setError(ec, ERROR_MISSING_HEADER_DATA);
00129             rc = false;
00130             break;
00131 
00132         // parsing chunked payload content
00133         case PARSE_CHUNKS:
00134             // parsing chunk data -> we can only recover if data fits into current chunk
00135             if (m_chunked_content_parse_state == PARSE_CHUNK
00136                 && m_bytes_read_in_current_chunk < m_size_of_current_chunk
00137                 && (m_size_of_current_chunk - m_bytes_read_in_current_chunk) >= len)
00138             {
00139                 // use dummy content for missing data
00140                 for (std::size_t n = 0; n < len && http_msg.getChunkCache().size() < m_max_content_length; ++n) 
00141                     http_msg.getChunkCache().push_back(MISSING_DATA_CHAR);
00142 
00143                 m_bytes_read_in_current_chunk += len;
00144                 m_bytes_last_read = len;
00145                 m_bytes_total_read += len;
00146                 m_bytes_content_read += len;
00147 
00148                 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
00149                     m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
00150                 }
00151             } else {
00152                 // cannot recover from missing data
00153                 setError(ec, ERROR_MISSING_CHUNK_DATA);
00154                 rc = false;
00155             }
00156             break;
00157 
00158         // parsing regular payload content with a known length
00159         case PARSE_CONTENT:
00160             // parsing content (with length) -> we can only recover if data fits into content
00161             if (m_bytes_content_remaining == 0) {
00162                 // we have all of the remaining payload content
00163                 rc = true;
00164             } else if (m_bytes_content_remaining < len) {
00165                 // cannot recover from missing data
00166                 setError(ec, ERROR_MISSING_TOO_MUCH_CONTENT);
00167                 rc = false;
00168             } else {
00169 
00170                 // make sure content buffer is not already full
00171                 if ( (m_bytes_content_read+len) <= m_max_content_length) {
00172                     // use dummy content for missing data
00173                     for (std::size_t n = 0; n < len; ++n)
00174                         http_msg.getContent()[m_bytes_content_read++] = MISSING_DATA_CHAR;
00175                 } else {
00176                     m_bytes_content_read += len;
00177                 }
00178 
00179                 m_bytes_content_remaining -= len;
00180                 m_bytes_total_read += len;
00181                 m_bytes_last_read = len;
00182 
00183                 if (m_bytes_content_remaining == 0)
00184                     rc = true;
00185             }
00186             break;
00187 
00188         // parsing payload content with no length (until EOF)
00189         case PARSE_CONTENT_NO_LENGTH:
00190             // use dummy content for missing data
00191             for (std::size_t n = 0; n < len && http_msg.getChunkCache().size() < m_max_content_length; ++n) 
00192                 http_msg.getChunkCache().push_back(MISSING_DATA_CHAR);
00193             m_bytes_last_read = len;
00194             m_bytes_total_read += len;
00195             m_bytes_content_read += len;
00196             break;
00197 
00198         // finished parsing the HTTP message
00199         case PARSE_END:
00200             rc = true;
00201             break;
00202     }
00203 
00204     // check if we've finished parsing the HTTP message
00205     if (rc == true) {
00206         m_message_parse_state = PARSE_END;
00207         finish(http_msg);
00208     } else if(rc == false) {
00209         computeMsgStatus(http_msg, false);
00210     }
00211 
00212     return rc;
00213 }
00214 
00215 boost::tribool HTTPParser::parseHeaders(HTTPMessage& http_msg,
00216     boost::system::error_code& ec)
00217 {
00218     //
00219     // note that boost::tribool may have one of THREE states:
00220     //
00221     // false: encountered an error while parsing HTTP headers
00222     // true: finished successfully parsing the HTTP headers
00223     // indeterminate: parsed bytes, but the HTTP headers are not yet finished
00224     //
00225     const char *read_start_ptr = m_read_ptr;
00226     m_bytes_last_read = 0;
00227     while (m_read_ptr < m_read_end_ptr) {
00228 
00229         if (m_save_raw_headers)
00230             m_raw_headers += *m_read_ptr;
00231         
00232         switch (m_headers_parse_state) {
00233         case PARSE_METHOD_START:
00234             // we have not yet started parsing the HTTP method string
00235             if (*m_read_ptr != ' ' && *m_read_ptr!='\r' && *m_read_ptr!='\n') { // ignore leading whitespace
00236                 if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00237                     setError(ec, ERROR_METHOD_CHAR);
00238                     return false;
00239                 }
00240                 m_headers_parse_state = PARSE_METHOD;
00241                 m_method.erase();
00242                 m_method.push_back(*m_read_ptr);
00243             }
00244             break;
00245 
00246         case PARSE_METHOD:
00247             // we have started parsing the HTTP method string
00248             if (*m_read_ptr == ' ') {
00249                 m_resource.erase();
00250                 m_headers_parse_state = PARSE_URI_STEM;
00251             } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00252                 setError(ec, ERROR_METHOD_CHAR);
00253                 return false;
00254             } else if (m_method.size() >= METHOD_MAX) {
00255                 setError(ec, ERROR_METHOD_SIZE);
00256                 return false;
00257             } else {
00258                 m_method.push_back(*m_read_ptr);
00259             }
00260             break;
00261 
00262         case PARSE_URI_STEM:
00263             // we have started parsing the URI stem (or resource name)
00264             if (*m_read_ptr == ' ') {
00265                 m_headers_parse_state = PARSE_HTTP_VERSION_H;
00266             } else if (*m_read_ptr == '?') {
00267                 m_query_string.erase();
00268                 m_headers_parse_state = PARSE_URI_QUERY;
00269             } else if (*m_read_ptr == '\r') {
00270                 http_msg.setVersionMajor(0);
00271                 http_msg.setVersionMinor(0);
00272                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00273             } else if (*m_read_ptr == '\n') {
00274                 http_msg.setVersionMajor(0);
00275                 http_msg.setVersionMinor(0);
00276                 m_headers_parse_state = PARSE_EXPECTING_CR;
00277             } else if (isControl(*m_read_ptr)) {
00278                 setError(ec, ERROR_URI_CHAR);
00279                 return false;
00280             } else if (m_resource.size() >= RESOURCE_MAX) {
00281                 setError(ec, ERROR_URI_SIZE);
00282                 return false;
00283             } else {
00284                 m_resource.push_back(*m_read_ptr);
00285             }
00286             break;
00287 
00288         case PARSE_URI_QUERY:
00289             // we have started parsing the URI query string
00290             if (*m_read_ptr == ' ') {
00291                 m_headers_parse_state = PARSE_HTTP_VERSION_H;
00292             } else if (isControl(*m_read_ptr)) {
00293                 setError(ec, ERROR_QUERY_CHAR);
00294                 return false;
00295             } else if (m_query_string.size() >= QUERY_STRING_MAX) {
00296                 setError(ec, ERROR_QUERY_SIZE);
00297                 return false;
00298             } else {
00299                 m_query_string.push_back(*m_read_ptr);
00300             }
00301             break;
00302 
00303         case PARSE_HTTP_VERSION_H:
00304             // parsing "HTTP"
00305             if (*m_read_ptr == '\r') {
00306                 // should only happen for requests (no HTTP/VERSION specified)
00307                 if (! m_is_request) {
00308                     setError(ec, ERROR_VERSION_EMPTY);
00309                     return false;
00310                 }
00311                 http_msg.setVersionMajor(0);
00312                 http_msg.setVersionMinor(0);
00313                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00314             } else if (*m_read_ptr == '\n') {
00315                 // should only happen for requests (no HTTP/VERSION specified)
00316                 if (! m_is_request) {
00317                     setError(ec, ERROR_VERSION_EMPTY);
00318                     return false;
00319                 }
00320                 http_msg.setVersionMajor(0);
00321                 http_msg.setVersionMinor(0);
00322                 m_headers_parse_state = PARSE_EXPECTING_CR;
00323             } else if (*m_read_ptr != 'H') {
00324                 setError(ec, ERROR_VERSION_CHAR);
00325                 return false;
00326             }
00327             m_headers_parse_state = PARSE_HTTP_VERSION_T_1;
00328             break;
00329 
00330         case PARSE_HTTP_VERSION_T_1:
00331             // parsing "HTTP"
00332             if (*m_read_ptr != 'T') {
00333                 setError(ec, ERROR_VERSION_CHAR);
00334                 return false;
00335             }
00336             m_headers_parse_state = PARSE_HTTP_VERSION_T_2;
00337             break;
00338 
00339         case PARSE_HTTP_VERSION_T_2:
00340             // parsing "HTTP"
00341             if (*m_read_ptr != 'T') {
00342                 setError(ec, ERROR_VERSION_CHAR);
00343                 return false;
00344             }
00345             m_headers_parse_state = PARSE_HTTP_VERSION_P;
00346             break;
00347 
00348         case PARSE_HTTP_VERSION_P:
00349             // parsing "HTTP"
00350             if (*m_read_ptr != 'P') {
00351                 setError(ec, ERROR_VERSION_CHAR);
00352                 return false;
00353             }
00354             m_headers_parse_state = PARSE_HTTP_VERSION_SLASH;
00355             break;
00356 
00357         case PARSE_HTTP_VERSION_SLASH:
00358             // parsing slash after "HTTP"
00359             if (*m_read_ptr != '/') {
00360                 setError(ec, ERROR_VERSION_CHAR);
00361                 return false;
00362             }
00363             m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR_START;
00364             break;
00365 
00366         case PARSE_HTTP_VERSION_MAJOR_START:
00367             // parsing the first digit of the major version number
00368             if (!isDigit(*m_read_ptr)) {
00369                 setError(ec, ERROR_VERSION_CHAR);
00370                 return false;
00371             }
00372             http_msg.setVersionMajor(*m_read_ptr - '0');
00373             m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR;
00374             break;
00375 
00376         case PARSE_HTTP_VERSION_MAJOR:
00377             // parsing the major version number (not first digit)
00378             if (*m_read_ptr == '.') {
00379                 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR_START;
00380             } else if (isDigit(*m_read_ptr)) {
00381                 http_msg.setVersionMajor( (http_msg.getVersionMajor() * 10)
00382                                           + (*m_read_ptr - '0') );
00383             } else {
00384                 setError(ec, ERROR_VERSION_CHAR);
00385                 return false;
00386             }
00387             break;
00388 
00389         case PARSE_HTTP_VERSION_MINOR_START:
00390             // parsing the first digit of the minor version number
00391             if (!isDigit(*m_read_ptr)) {
00392                 setError(ec, ERROR_VERSION_CHAR);
00393                 return false;
00394             }
00395             http_msg.setVersionMinor(*m_read_ptr - '0');
00396             m_headers_parse_state = PARSE_HTTP_VERSION_MINOR;
00397             break;
00398 
00399         case PARSE_HTTP_VERSION_MINOR:
00400             // parsing the major version number (not first digit)
00401             if (*m_read_ptr == ' ') {
00402                 // ignore trailing spaces after version in request
00403                 if (! m_is_request) {
00404                     m_headers_parse_state = PARSE_STATUS_CODE_START;
00405                 }
00406             } else if (*m_read_ptr == '\r') {
00407                 // should only happen for requests
00408                 if (! m_is_request) {
00409                     setError(ec, ERROR_STATUS_EMPTY);
00410                     return false;
00411                 }
00412                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00413             } else if (*m_read_ptr == '\n') {
00414                 // should only happen for requests
00415                 if (! m_is_request) {
00416                     setError(ec, ERROR_STATUS_EMPTY);
00417                     return false;
00418                 }
00419                 m_headers_parse_state = PARSE_EXPECTING_CR;
00420             } else if (isDigit(*m_read_ptr)) {
00421                 http_msg.setVersionMinor( (http_msg.getVersionMinor() * 10)
00422                                           + (*m_read_ptr - '0') );
00423             } else {
00424                 setError(ec, ERROR_VERSION_CHAR);
00425                 return false;
00426             }
00427             break;
00428 
00429         case PARSE_STATUS_CODE_START:
00430             // parsing the first digit of the response status code
00431             if (!isDigit(*m_read_ptr)) {
00432                 setError(ec, ERROR_STATUS_CHAR);
00433                 return false;
00434             }
00435             m_status_code = (*m_read_ptr - '0');
00436             m_headers_parse_state = PARSE_STATUS_CODE;
00437             break;
00438 
00439         case PARSE_STATUS_CODE:
00440             // parsing the response status code (not first digit)
00441             if (*m_read_ptr == ' ') {
00442                 m_status_message.erase();
00443                 m_headers_parse_state = PARSE_STATUS_MESSAGE;
00444             } else if (isDigit(*m_read_ptr)) {
00445                 m_status_code = ( (m_status_code * 10) + (*m_read_ptr - '0') );
00446             } else if (*m_read_ptr == '\r') {
00447                 // recover from status message not sent
00448                 m_status_message.erase();
00449                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00450             } else if (*m_read_ptr == '\n') {
00451                 // recover from status message not sent
00452                 m_status_message.erase();
00453                 m_headers_parse_state = PARSE_EXPECTING_CR;
00454             } else {
00455                 setError(ec, ERROR_STATUS_CHAR);
00456                 return false;
00457             }
00458             break;
00459 
00460         case PARSE_STATUS_MESSAGE:
00461             // parsing the response status message
00462             if (*m_read_ptr == '\r') {
00463                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00464             } else if (*m_read_ptr == '\n') {
00465                 m_headers_parse_state = PARSE_EXPECTING_CR;
00466             } else if (isControl(*m_read_ptr)) {
00467                 setError(ec, ERROR_STATUS_CHAR);
00468                 return false;
00469             } else if (m_status_message.size() >= STATUS_MESSAGE_MAX) {
00470                 setError(ec, ERROR_STATUS_CHAR);
00471                 return false;
00472             } else {
00473                 m_status_message.push_back(*m_read_ptr);
00474             }
00475             break;
00476 
00477         case PARSE_EXPECTING_NEWLINE:
00478             // we received a CR; expecting a newline to follow
00479             if (*m_read_ptr == '\n') {
00480                 m_headers_parse_state = PARSE_HEADER_START;
00481             } else if (*m_read_ptr == '\r') {
00482                 // we received two CR's in a row
00483                 // assume CR only is (incorrectly) being used for line termination
00484                 // therefore, the message is finished
00485                 ++m_read_ptr;
00486                 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00487                 m_bytes_total_read += m_bytes_last_read;
00488                 return true;
00489             } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00490                 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00491             } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00492                 setError(ec, ERROR_HEADER_CHAR);
00493                 return false;
00494             } else {
00495                 // assume it is the first character for the name of a header
00496                 m_header_name.erase();
00497                 m_header_name.push_back(*m_read_ptr);
00498                 m_headers_parse_state = PARSE_HEADER_NAME;
00499             }
00500             break;
00501 
00502         case PARSE_EXPECTING_CR:
00503             // we received a newline without a CR
00504             if (*m_read_ptr == '\r') {
00505                 m_headers_parse_state = PARSE_HEADER_START;
00506             } else if (*m_read_ptr == '\n') {
00507                 // we received two newlines in a row
00508                 // assume newline only is (incorrectly) being used for line termination
00509                 // therefore, the message is finished
00510                 ++m_read_ptr;
00511                 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00512                 m_bytes_total_read += m_bytes_last_read;
00513                 return true;
00514             } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00515                 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00516             } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00517                 setError(ec, ERROR_HEADER_CHAR);
00518                 return false;
00519             } else {
00520                 // assume it is the first character for the name of a header
00521                 m_header_name.erase();
00522                 m_header_name.push_back(*m_read_ptr);
00523                 m_headers_parse_state = PARSE_HEADER_NAME;
00524             }
00525             break;
00526 
00527         case PARSE_HEADER_WHITESPACE:
00528             // parsing whitespace before a header name
00529             if (*m_read_ptr == '\r') {
00530                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00531             } else if (*m_read_ptr == '\n') {
00532                 m_headers_parse_state = PARSE_EXPECTING_CR;
00533             } else if (*m_read_ptr != '\t' && *m_read_ptr != ' ') {
00534                 if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr))
00535                     setError(ec, ERROR_HEADER_CHAR);
00536                     return false;
00537                 // assume it is the first character for the name of a header
00538                 m_header_name.erase();
00539                 m_header_name.push_back(*m_read_ptr);
00540                 m_headers_parse_state = PARSE_HEADER_NAME;
00541             }
00542             break;
00543 
00544         case PARSE_HEADER_START:
00545             // parsing the start of a new header
00546             if (*m_read_ptr == '\r') {
00547                 m_headers_parse_state = PARSE_EXPECTING_FINAL_NEWLINE;
00548             } else if (*m_read_ptr == '\n') {
00549                 m_headers_parse_state = PARSE_EXPECTING_FINAL_CR;
00550             } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00551                 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00552             } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00553                 setError(ec, ERROR_HEADER_CHAR);
00554                 return false;
00555             } else {
00556                 // first character for the name of a header
00557                 m_header_name.erase();
00558                 m_header_name.push_back(*m_read_ptr);
00559                 m_headers_parse_state = PARSE_HEADER_NAME;
00560             }
00561             break;
00562 
00563         case PARSE_HEADER_NAME:
00564             // parsing the name of a header
00565             if (*m_read_ptr == ':') {
00566                 m_header_value.erase();
00567                 m_headers_parse_state = PARSE_SPACE_BEFORE_HEADER_VALUE;
00568             } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00569                 setError(ec, ERROR_HEADER_CHAR);
00570                 return false;
00571             } else if (m_header_name.size() >= HEADER_NAME_MAX) {
00572                 setError(ec, ERROR_HEADER_NAME_SIZE);
00573                 return false;
00574             } else {
00575                 // character (not first) for the name of a header
00576                 m_header_name.push_back(*m_read_ptr);
00577             }
00578             break;
00579 
00580         case PARSE_SPACE_BEFORE_HEADER_VALUE:
00581             // parsing space character before a header's value
00582             if (*m_read_ptr == ' ') {
00583                 m_headers_parse_state = PARSE_HEADER_VALUE;
00584             } else if (*m_read_ptr == '\r') {
00585                 http_msg.addHeader(m_header_name, m_header_value);
00586                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00587             } else if (*m_read_ptr == '\n') {
00588                 http_msg.addHeader(m_header_name, m_header_value);
00589                 m_headers_parse_state = PARSE_EXPECTING_CR;
00590             } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00591                 setError(ec, ERROR_HEADER_CHAR);
00592                 return false;
00593             } else {
00594                 // assume it is the first character for the value of a header
00595                 m_header_value.push_back(*m_read_ptr);
00596                 m_headers_parse_state = PARSE_HEADER_VALUE;
00597             }
00598             break;
00599 
00600         case PARSE_HEADER_VALUE:
00601             // parsing the value of a header
00602             if (*m_read_ptr == '\r') {
00603                 http_msg.addHeader(m_header_name, m_header_value);
00604                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00605             } else if (*m_read_ptr == '\n') {
00606                 http_msg.addHeader(m_header_name, m_header_value);
00607                 m_headers_parse_state = PARSE_EXPECTING_CR;
00608             } else if (isControl(*m_read_ptr)) {
00609                 setError(ec, ERROR_HEADER_CHAR);
00610                 return false;
00611             } else if (m_header_value.size() >= HEADER_VALUE_MAX) {
00612                 setError(ec, ERROR_HEADER_VALUE_SIZE);
00613                 return false;
00614             } else {
00615                 // character (not first) for the value of a header
00616                 m_header_value.push_back(*m_read_ptr);
00617             }
00618             break;
00619 
00620         case PARSE_EXPECTING_FINAL_NEWLINE:
00621             if (*m_read_ptr == '\n') ++m_read_ptr;
00622             m_bytes_last_read = (m_read_ptr - read_start_ptr);
00623             m_bytes_total_read += m_bytes_last_read;
00624             return true;
00625 
00626         case PARSE_EXPECTING_FINAL_CR:
00627             if (*m_read_ptr == '\r') ++m_read_ptr;
00628             m_bytes_last_read = (m_read_ptr - read_start_ptr);
00629             m_bytes_total_read += m_bytes_last_read;
00630             return true;
00631         }
00632         
00633         ++m_read_ptr;
00634     }
00635 
00636     m_bytes_last_read = (m_read_ptr - read_start_ptr);
00637     m_bytes_total_read += m_bytes_last_read;
00638     return boost::indeterminate;
00639 }
00640 
00641 void HTTPParser::updateMessageWithHeaderData(HTTPMessage& http_msg) const
00642 {
00643     if (isParsingRequest()) {
00644 
00645         // finish an HTTP request message
00646 
00647         HTTPRequest& http_request(dynamic_cast<HTTPRequest&>(http_msg));
00648         http_request.setMethod(m_method);
00649         http_request.setResource(m_resource);
00650         http_request.setQueryString(m_query_string);
00651 
00652         // parse query pairs from the URI query string
00653         if (! m_query_string.empty()) {
00654             if (! parseURLEncoded(http_request.getQueryParams(),
00655                                   m_query_string.c_str(),
00656                                   m_query_string.size())) 
00657                 PION_LOG_WARN(m_logger, "Request query string parsing failed (URI)");
00658         }
00659 
00660         // parse "Cookie" headers in request
00661         std::pair<HTTPTypes::Headers::const_iterator, HTTPTypes::Headers::const_iterator>
00662         cookie_pair = http_request.getHeaders().equal_range(HTTPTypes::HEADER_COOKIE);
00663         for (HTTPTypes::Headers::const_iterator cookie_iterator = cookie_pair.first;
00664              cookie_iterator != http_request.getHeaders().end()
00665              && cookie_iterator != cookie_pair.second; ++cookie_iterator)
00666         {
00667             if (! parseCookieHeader(http_request.getCookieParams(),
00668                                     cookie_iterator->second, false) )
00669                 PION_LOG_WARN(m_logger, "Cookie header parsing failed");
00670         }
00671 
00672     } else {
00673 
00674         // finish an HTTP response message
00675 
00676         HTTPResponse& http_response(dynamic_cast<HTTPResponse&>(http_msg));
00677         http_response.setStatusCode(m_status_code);
00678         http_response.setStatusMessage(m_status_message);
00679 
00680         // parse "Set-Cookie" headers in response
00681         std::pair<HTTPTypes::Headers::const_iterator, HTTPTypes::Headers::const_iterator>
00682         cookie_pair = http_response.getHeaders().equal_range(HTTPTypes::HEADER_SET_COOKIE);
00683         for (HTTPTypes::Headers::const_iterator cookie_iterator = cookie_pair.first;
00684              cookie_iterator != http_response.getHeaders().end()
00685              && cookie_iterator != cookie_pair.second; ++cookie_iterator)
00686         {
00687             if (! parseCookieHeader(http_response.getCookieParams(),
00688                                     cookie_iterator->second, true) )
00689                 PION_LOG_WARN(m_logger, "Set-Cookie header parsing failed");
00690         }
00691 
00692     }
00693 }
00694 
00695 boost::tribool HTTPParser::finishHeaderParsing(HTTPMessage& http_msg,
00696     boost::system::error_code& ec)
00697 {
00698     boost::tribool rc = boost::indeterminate;
00699 
00700     m_bytes_content_remaining = m_bytes_content_read = 0;
00701     http_msg.setContentLength(0);
00702     http_msg.updateTransferCodingUsingHeader();
00703     updateMessageWithHeaderData(http_msg);
00704 
00705     if (http_msg.isChunked()) {
00706 
00707         // content is encoded using chunks
00708         m_message_parse_state = PARSE_CHUNKS;
00709         
00710         // return true if parsing headers only
00711         if (m_parse_headers_only)
00712             rc = true;
00713 
00714     } else if (http_msg.isContentLengthImplied()) {
00715 
00716         // content length is implied to be zero
00717         m_message_parse_state = PARSE_END;
00718         rc = true;
00719 
00720     } else {
00721         // content length should be specified in the headers
00722 
00723         if (http_msg.hasHeader(HTTPTypes::HEADER_CONTENT_LENGTH)) {
00724 
00725             // message has a content-length header
00726             try {
00727                 http_msg.updateContentLengthUsingHeader();
00728             } catch (...) {
00729                 PION_LOG_ERROR(m_logger, "Unable to update content length");
00730                 setError(ec, ERROR_INVALID_CONTENT_LENGTH);
00731                 return false;
00732             }
00733 
00734             // check if content-length header == 0
00735             if (http_msg.getContentLength() == 0) {
00736                 m_message_parse_state = PARSE_END;
00737                 rc = true;
00738             } else {
00739                 m_message_parse_state = PARSE_CONTENT;
00740                 m_bytes_content_remaining = http_msg.getContentLength();
00741 
00742                 // check if content-length exceeds maximum allowed
00743                 if (m_bytes_content_remaining > m_max_content_length)
00744                     http_msg.setContentLength(m_max_content_length);
00745 
00746                 // return true if parsing headers only
00747                 if (m_parse_headers_only)
00748                     rc = true;
00749             }
00750 
00751         } else {
00752             // no content-length specified, and the content length cannot 
00753             // otherwise be determined
00754 
00755             // only if not a request, read through the close of the connection
00756             if (! m_is_request) {
00757                 // clear the chunk buffers before we start
00758                 http_msg.getChunkCache().clear();
00759 
00760                 // continue reading content until there is no more data
00761                 m_message_parse_state = PARSE_CONTENT_NO_LENGTH;
00762 
00763                 // return true if parsing headers only
00764                 if (m_parse_headers_only)
00765                     rc = true;
00766             } else {
00767                 m_message_parse_state = PARSE_END;
00768                 rc = true;
00769             }
00770         }
00771     }
00772 
00773     // allocate a buffer for payload content (may be zero-size)
00774     http_msg.createContentBuffer();
00775 
00776     return rc;
00777 }
00778 
00779 bool HTTPParser::parseURLEncoded(HTTPTypes::QueryParams& dict,
00780                                  const char *ptr, const size_t len)
00781 {
00782     // used to track whether we are parsing the name or value
00783     enum QueryParseState {
00784         QUERY_PARSE_NAME, QUERY_PARSE_VALUE
00785     } parse_state = QUERY_PARSE_NAME;
00786 
00787     // misc other variables used for parsing
00788     const char * const end = ptr + len;
00789     std::string query_name;
00790     std::string query_value;
00791 
00792     // iterate through each encoded character
00793     while (ptr < end) {
00794         switch (parse_state) {
00795 
00796         case QUERY_PARSE_NAME:
00797             // parsing query name
00798             if (*ptr == '=') {
00799                 // end of name found (OK if empty)
00800                 parse_state = QUERY_PARSE_VALUE;
00801             } else if (*ptr == '&') {
00802                 // if query name is empty, just skip it (i.e. "&&")
00803                 if (! query_name.empty()) {
00804                     // assume that "=" is missing -- it's OK if the value is empty
00805                     dict.insert( std::make_pair(query_name, query_value) );
00806                     query_name.erase();
00807                 }
00808             } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') {
00809                 // ignore linefeeds, carriage return and tabs (normally within POST content)
00810             } else if (isControl(*ptr) || query_name.size() >= QUERY_NAME_MAX) {
00811                 // control character detected, or max sized exceeded
00812                 return false;
00813             } else {
00814                 // character is part of the name
00815                 query_name.push_back(*ptr);
00816             }
00817             break;
00818 
00819         case QUERY_PARSE_VALUE:
00820             // parsing query value
00821             if (*ptr == '&') {
00822                 // end of value found (OK if empty)
00823                 if (! query_name.empty()) {
00824                     dict.insert( std::make_pair(query_name, query_value) );
00825                     query_name.erase();
00826                 }
00827                 query_value.erase();
00828                 parse_state = QUERY_PARSE_NAME;
00829             } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') {
00830                 // ignore linefeeds, carriage return and tabs (normally within POST content)
00831             } else if (isControl(*ptr) || query_value.size() >= QUERY_VALUE_MAX) {
00832                 // control character detected, or max sized exceeded
00833                 return false;
00834             } else {
00835                 // character is part of the value
00836                 query_value.push_back(*ptr);
00837             }
00838             break;
00839         }
00840 
00841         ++ptr;
00842     }
00843 
00844     // handle last pair in string
00845     if (! query_name.empty())
00846         dict.insert( std::make_pair(query_name, query_value) );
00847 
00848     return true;
00849 }
00850 
00851 bool HTTPParser::parseCookieHeader(HTTPTypes::CookieParams& dict,
00852                                    const char *ptr, const size_t len,
00853                                    bool set_cookie_header)
00854 {
00855     // BASED ON RFC 2109
00856     // http://www.ietf.org/rfc/rfc2109.txt
00857     // 
00858     // The current implementation ignores cookie attributes which begin with '$'
00859     // (i.e. $Path=/, $Domain=, etc.)
00860 
00861     // used to track what we are parsing
00862     enum CookieParseState {
00863         COOKIE_PARSE_NAME, COOKIE_PARSE_VALUE, COOKIE_PARSE_IGNORE
00864     } parse_state = COOKIE_PARSE_NAME;
00865 
00866     // misc other variables used for parsing
00867     const char * const end = ptr + len;
00868     std::string cookie_name;
00869     std::string cookie_value;
00870     char value_quote_character = '\0';
00871 
00872     // iterate through each character
00873     while (ptr < end) {
00874         switch (parse_state) {
00875 
00876         case COOKIE_PARSE_NAME:
00877             // parsing cookie name
00878             if (*ptr == '=') {
00879                 // end of name found (OK if empty)
00880                 value_quote_character = '\0';
00881                 parse_state = COOKIE_PARSE_VALUE;
00882             } else if (*ptr == ';' || *ptr == ',') {
00883                 // ignore empty cookie names since this may occur naturally
00884                 // when quoted values are encountered
00885                 if (! cookie_name.empty()) {
00886                     // value is empty (OK)
00887                     if (! isCookieAttribute(cookie_name, set_cookie_header))
00888                         dict.insert( std::make_pair(cookie_name, cookie_value) );
00889                     cookie_name.erase();
00890                 }
00891             } else if (*ptr != ' ') {   // ignore whitespace
00892                 // check if control character detected, or max sized exceeded
00893                 if (isControl(*ptr) || cookie_name.size() >= COOKIE_NAME_MAX)
00894                     return false;
00895                 // character is part of the name
00896                 cookie_name.push_back(*ptr);
00897             }
00898             break;
00899 
00900         case COOKIE_PARSE_VALUE:
00901             // parsing cookie value
00902             if (value_quote_character == '\0') {
00903                 // value is not (yet) quoted
00904                 if (*ptr == ';' || *ptr == ',') {
00905                     // end of value found (OK if empty)
00906                     if (! isCookieAttribute(cookie_name, set_cookie_header))
00907                         dict.insert( std::make_pair(cookie_name, cookie_value) );
00908                     cookie_name.erase();
00909                     cookie_value.erase();
00910                     parse_state = COOKIE_PARSE_NAME;
00911                 } else if (*ptr == '\'' || *ptr == '"') {
00912                     if (cookie_value.empty()) {
00913                         // begin quoted value
00914                         value_quote_character = *ptr;
00915                     } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
00916                         // max size exceeded
00917                         return false;
00918                     } else {
00919                         // assume character is part of the (unquoted) value
00920                         cookie_value.push_back(*ptr);
00921                     }
00922                 } else if (*ptr != ' ' || !cookie_value.empty()) {  // ignore leading unquoted whitespace
00923                     // check if control character detected, or max sized exceeded
00924                     if (isControl(*ptr) || cookie_value.size() >= COOKIE_VALUE_MAX)
00925                         return false;
00926                     // character is part of the (unquoted) value
00927                     cookie_value.push_back(*ptr);
00928                 }
00929             } else {
00930                 // value is quoted
00931                 if (*ptr == value_quote_character) {
00932                     // end of value found (OK if empty)
00933                     if (! isCookieAttribute(cookie_name, set_cookie_header))
00934                         dict.insert( std::make_pair(cookie_name, cookie_value) );
00935                     cookie_name.erase();
00936                     cookie_value.erase();
00937                     parse_state = COOKIE_PARSE_IGNORE;
00938                 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
00939                     // max size exceeded
00940                     return false;
00941                 } else {
00942                     // character is part of the (quoted) value
00943                     cookie_value.push_back(*ptr);
00944                 }
00945             }
00946             break;
00947 
00948         case COOKIE_PARSE_IGNORE:
00949             // ignore everything until we reach a comma "," or semicolon ";"
00950             if (*ptr == ';' || *ptr == ',')
00951                 parse_state = COOKIE_PARSE_NAME;
00952             break;
00953         }
00954 
00955         ++ptr;
00956     }
00957 
00958     // handle last cookie in string
00959     if (! isCookieAttribute(cookie_name, set_cookie_header))
00960         dict.insert( std::make_pair(cookie_name, cookie_value) );
00961 
00962     return true;
00963 }
00964 
00965 boost::tribool HTTPParser::parseChunks(HTTPMessage::ChunkCache& chunk_cache,
00966     boost::system::error_code& ec)
00967 {
00968     //
00969     // note that boost::tribool may have one of THREE states:
00970     //
00971     // false: encountered an error while parsing message
00972     // true: finished successfully parsing the message
00973     // indeterminate: parsed bytes, but the message is not yet finished
00974     //
00975     const char *read_start_ptr = m_read_ptr;
00976     m_bytes_last_read = 0;
00977     while (m_read_ptr < m_read_end_ptr) {
00978 
00979         switch (m_chunked_content_parse_state) {
00980         case PARSE_CHUNK_SIZE_START:
00981             // we have not yet started parsing the next chunk size
00982             if (isHexDigit(*m_read_ptr)) {
00983                 m_chunk_size_str.erase();
00984                 m_chunk_size_str.push_back(*m_read_ptr);
00985                 m_chunked_content_parse_state = PARSE_CHUNK_SIZE;
00986             } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09' || *m_read_ptr == '\x0D' || *m_read_ptr == '\x0A') {
00987                 // Ignore leading whitespace.  Technically, the standard probably doesn't allow white space here, 
00988                 // but we'll be flexible, since there's no ambiguity.
00989                 break;
00990             } else {
00991                 setError(ec, ERROR_CHUNK_CHAR);
00992                 return false;
00993             }
00994             break;
00995 
00996         case PARSE_CHUNK_SIZE:
00997             if (isHexDigit(*m_read_ptr)) {
00998                 m_chunk_size_str.push_back(*m_read_ptr);
00999             } else if (*m_read_ptr == '\x0D') {
01000                 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01001             } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
01002                 // Ignore trailing tabs or spaces.  Technically, the standard probably doesn't allow this, 
01003                 // but we'll be flexible, since there's no ambiguity.
01004                 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE;
01005             } else {
01006                 setError(ec, ERROR_CHUNK_CHAR);
01007                 return false;
01008             }
01009             break;
01010 
01011         case PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE:
01012             if (*m_read_ptr == '\x0D') {
01013                 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01014             } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
01015                 // Ignore trailing tabs or spaces.  Technically, the standard probably doesn't allow this, 
01016                 // but we'll be flexible, since there's no ambiguity.
01017                 break;
01018             } else {
01019                 setError(ec, ERROR_CHUNK_CHAR);
01020                 return false;
01021             }
01022             break;
01023 
01024         case PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE:
01025             // We received a CR; expecting LF to follow.  We can't be flexible here because 
01026             // if we see anything other than LF, we can't be certain where the chunk starts.
01027             if (*m_read_ptr == '\x0A') {
01028                 m_bytes_read_in_current_chunk = 0;
01029                 m_size_of_current_chunk = strtol(m_chunk_size_str.c_str(), 0, 16);
01030                 if (m_size_of_current_chunk == 0) {
01031                     m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK;
01032                 } else {
01033                     m_chunked_content_parse_state = PARSE_CHUNK;
01034                 }
01035             } else {
01036                 setError(ec, ERROR_CHUNK_CHAR);
01037                 return false;
01038             }
01039             break;
01040 
01041         case PARSE_CHUNK:
01042             if (m_bytes_read_in_current_chunk < m_size_of_current_chunk) {
01043                 if (chunk_cache.size() < m_max_content_length)
01044                     chunk_cache.push_back(*m_read_ptr);
01045                 m_bytes_read_in_current_chunk++;
01046             }
01047             if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
01048                 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
01049             }
01050             break;
01051 
01052         case PARSE_EXPECTING_CR_AFTER_CHUNK:
01053             // we've read exactly m_size_of_current_chunk bytes since starting the current chunk
01054             if (*m_read_ptr == '\x0D') {
01055                 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK;
01056             } else {
01057                 setError(ec, ERROR_CHUNK_CHAR);
01058                 return false;
01059             }
01060             break;
01061 
01062         case PARSE_EXPECTING_LF_AFTER_CHUNK:
01063             // we received a CR; expecting LF to follow
01064             if (*m_read_ptr == '\x0A') {
01065                 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
01066             } else {
01067                 setError(ec, ERROR_CHUNK_CHAR);
01068                 return false;
01069             }
01070             break;
01071 
01072         case PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK:
01073             // we've read the final chunk; expecting final CRLF
01074             if (*m_read_ptr == '\x0D') {
01075                 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK;
01076             } else {
01077                 setError(ec, ERROR_CHUNK_CHAR);
01078                 return false;
01079             }
01080             break;
01081 
01082         case PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK:
01083             // we received the final CR; expecting LF to follow
01084             if (*m_read_ptr == '\x0A') {
01085                 ++m_read_ptr;
01086                 m_bytes_last_read = (m_read_ptr - read_start_ptr);
01087                 m_bytes_total_read += m_bytes_last_read;
01088                 m_bytes_content_read += m_bytes_last_read;
01089                 PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete.");
01090                 return true;
01091             } else {
01092                 setError(ec, ERROR_CHUNK_CHAR);
01093                 return false;
01094             }
01095         }
01096 
01097         ++m_read_ptr;
01098     }
01099 
01100     m_bytes_last_read = (m_read_ptr - read_start_ptr);
01101     m_bytes_total_read += m_bytes_last_read;
01102     m_bytes_content_read += m_bytes_last_read;
01103     return boost::indeterminate;
01104 }
01105 
01106 boost::tribool HTTPParser::consumeContent(HTTPMessage& http_msg,
01107     boost::system::error_code& ec)
01108 {
01109     size_t content_bytes_to_read;
01110     size_t content_bytes_available = bytes_available();
01111     boost::tribool rc = boost::indeterminate;
01112 
01113     if (m_bytes_content_remaining == 0) {
01114         // we have all of the remaining payload content
01115         return true;
01116     } else {
01117         if (content_bytes_available >= m_bytes_content_remaining) {
01118             // we have all of the remaining payload content
01119             rc = true;
01120             content_bytes_to_read = m_bytes_content_remaining;
01121         } else {
01122             // only some of the payload content is available
01123             content_bytes_to_read = content_bytes_available;
01124         }
01125         m_bytes_content_remaining -= content_bytes_to_read;
01126     }
01127 
01128     // make sure content buffer is not already full
01129     if (m_bytes_content_read < m_max_content_length) {
01130         if (m_bytes_content_read + content_bytes_to_read > m_max_content_length) {
01131             // read would exceed maximum size for content buffer
01132             // copy only enough bytes to fill up the content buffer
01133             memcpy(http_msg.getContent() + m_bytes_content_read, m_read_ptr, 
01134                 m_max_content_length - m_bytes_content_read);
01135         } else {
01136             // copy all bytes available
01137             memcpy(http_msg.getContent() + m_bytes_content_read, m_read_ptr, content_bytes_to_read);
01138         }
01139     }
01140 
01141     m_read_ptr += content_bytes_to_read;
01142     m_bytes_content_read += content_bytes_to_read;
01143     m_bytes_total_read += content_bytes_to_read;
01144     m_bytes_last_read = content_bytes_to_read;
01145 
01146     return rc;
01147 }
01148 
01149 std::size_t HTTPParser::consumeContentAsNextChunk(HTTPMessage::ChunkCache& chunk_cache)
01150 {
01151     if (bytes_available() == 0) {
01152         m_bytes_last_read = 0;
01153     } else {
01154         m_bytes_last_read = (m_read_end_ptr - m_read_ptr);
01155         while (m_read_ptr < m_read_end_ptr) {
01156             if (chunk_cache.size() < m_max_content_length)
01157                 chunk_cache.push_back(*m_read_ptr);
01158             ++m_read_ptr;
01159         }
01160         m_bytes_total_read += m_bytes_last_read;
01161         m_bytes_content_read += m_bytes_last_read;
01162     }
01163     return m_bytes_last_read;
01164 }
01165 
01166 void HTTPParser::finish(HTTPMessage& http_msg) const
01167 {
01168     switch (m_message_parse_state) {
01169     case PARSE_START:
01170         http_msg.setIsValid(false);
01171         http_msg.setContentLength(0);
01172         http_msg.createContentBuffer();
01173         return;
01174     case PARSE_END:
01175         http_msg.setIsValid(true);
01176         break;
01177     case PARSE_HEADERS:
01178         http_msg.setIsValid(false);
01179         updateMessageWithHeaderData(http_msg);
01180         http_msg.setContentLength(0);
01181         http_msg.createContentBuffer();
01182         break;
01183     case PARSE_CONTENT:
01184         http_msg.setIsValid(false);
01185         if (getContentBytesRead() < m_max_content_length)   // NOTE: we can read more than we have allocated/stored
01186             http_msg.setContentLength(getContentBytesRead());
01187         break;
01188     case PARSE_CHUNKS:
01189         http_msg.setIsValid(m_chunked_content_parse_state==PARSE_CHUNK_SIZE_START);
01190         http_msg.concatenateChunks();
01191         break;
01192     case PARSE_CONTENT_NO_LENGTH:
01193         http_msg.setIsValid(true);
01194         http_msg.concatenateChunks();
01195         break;
01196     }
01197 
01198     computeMsgStatus(http_msg, http_msg.isValid());
01199 
01200     if (isParsingRequest()) {
01201         // Parse query pairs from post content if content type is x-www-form-urlencoded.
01202         // Type could be followed by parameters (as defined in section 3.6 of RFC 2616)
01203         // e.g. Content-Type: application/x-www-form-urlencoded; charset=UTF-8
01204         HTTPRequest& http_request(dynamic_cast<HTTPRequest&>(http_msg));
01205         const std::string& content_type_header = http_request.getHeader(HTTPTypes::HEADER_CONTENT_TYPE);
01206         if (content_type_header.compare(0, HTTPTypes::CONTENT_TYPE_URLENCODED.length(),
01207                                         HTTPTypes::CONTENT_TYPE_URLENCODED) == 0)
01208         {
01209             if (! parseURLEncoded(http_request.getQueryParams(),
01210                                   http_request.getContent(),
01211                                   http_request.getContentLength())) 
01212                 PION_LOG_WARN(m_logger, "Request query string parsing failed (POST content)");
01213         }
01214     }
01215 }
01216 
01217 void HTTPParser::computeMsgStatus(HTTPMessage& http_msg, bool msg_parsed_ok )
01218 {
01219     HTTPMessage::DataStatus st = HTTPMessage::STATUS_NONE;
01220 
01221     if(http_msg.hasMissingPackets()) {
01222         st = http_msg.hasDataAfterMissingPackets() ?
01223                         HTTPMessage::STATUS_PARTIAL : HTTPMessage::STATUS_TRUNCATED;
01224     } else {
01225         st = msg_parsed_ok ? HTTPMessage::STATUS_OK : HTTPMessage::STATUS_TRUNCATED;
01226     }
01227 
01228     http_msg.setStatus(st);
01229 }
01230 
01231 void HTTPParser::createErrorCategory(void)
01232 {
01233     static ErrorCategory UNIQUE_ERROR_CATEGORY;
01234     m_error_category_ptr = &UNIQUE_ERROR_CATEGORY;
01235 }
01236 
01237 bool HTTPParser::parseForwardedFor(const std::string& header, std::string& public_ip)
01238 {
01239     // static regex's used to check for ipv4 address
01240     static const boost::regex IPV4_ADDR_RX("[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}");
01241 
01247     static const boost::regex PRIVATE_NET_RX("(10\\.[0-9]{1,3}|127\\.[0-9]{1,3}|192\\.168|172\\.1[6-9]|172\\.2[0-9]|172\\.3[0-1])\\.[0-9]{1,3}\\.[0-9]{1,3}");
01248 
01249     // sanity check
01250     if (header.empty())
01251         return false;
01252 
01253     // local variables re-used by while loop
01254     boost::match_results<std::string::const_iterator> m;
01255     std::string::const_iterator start_it = header.begin();
01256 
01257     // search for next ip address within the header
01258     while (boost::regex_search(start_it, header.end(), m, IPV4_ADDR_RX)) {
01259         // get ip that matched
01260         std::string ip_str(m[0].first, m[0].second);
01261         // check if public network ip address
01262         if (! boost::regex_match(ip_str, PRIVATE_NET_RX) ) {
01263             // match found!
01264             public_ip = ip_str;
01265             return true;
01266         }
01267         // update search starting position
01268         start_it = m[0].second;
01269     }
01270 
01271     // no matches found
01272     return false;
01273 }
01274 
01275 }   // end namespace net
01276 }   // end namespace pion
01277