pion-net  4.0.9
net/include/pion/net/HTTPParser.hpp
00001 // ------------------------------------------------------------------
00002 // pion-net: a C++ framework for building lightweight HTTP interfaces
00003 // ------------------------------------------------------------------
00004 // Copyright (C) 2007-2008 Atomic Labs, Inc.  (http://www.atomiclabs.com)
00005 //
00006 // Distributed under the Boost Software License, Version 1.0.
00007 // See http://www.boost.org/LICENSE_1_0.txt
00008 //
00009 
00010 #ifndef __PION_HTTPPARSER_HEADER__
00011 #define __PION_HTTPPARSER_HEADER__
00012 
00013 #include <string>
00014 #include <boost/noncopyable.hpp>
00015 #include <boost/logic/tribool.hpp>
00016 #include <boost/system/error_code.hpp>
00017 #include <boost/thread/once.hpp>
00018 #include <pion/PionConfig.hpp>
00019 #include <pion/PionLogger.hpp>
00020 #include <pion/net/HTTPMessage.hpp>
00021 
00022 
00023 namespace pion {    // begin namespace pion
00024 namespace net {     // begin namespace net (Pion Network Library)
00025 
00026 // forward declarations used for finishing HTTP messages
00027 class HTTPRequest;
00028 class HTTPResponse;
00029 
00033 class PION_NET_API HTTPParser :
00034     private boost::noncopyable
00035 {
00036 
00037 public:
00038 
00040     static const std::size_t        DEFAULT_CONTENT_MAX;
00041 
00043     enum ErrorValue {
00044         ERROR_METHOD_CHAR = 1,
00045         ERROR_METHOD_SIZE,
00046         ERROR_URI_CHAR,
00047         ERROR_URI_SIZE,
00048         ERROR_QUERY_CHAR,
00049         ERROR_QUERY_SIZE,
00050         ERROR_VERSION_EMPTY,
00051         ERROR_VERSION_CHAR,
00052         ERROR_STATUS_EMPTY,
00053         ERROR_STATUS_CHAR,
00054         ERROR_HEADER_CHAR,
00055         ERROR_HEADER_NAME_SIZE,
00056         ERROR_HEADER_VALUE_SIZE,
00057         ERROR_INVALID_CONTENT_LENGTH,
00058         ERROR_CHUNK_CHAR,
00059         ERROR_MISSING_CHUNK_DATA,
00060         ERROR_MISSING_HEADER_DATA,
00061         ERROR_MISSING_TOO_MUCH_CONTENT,
00062     };
00063     
00065     class ErrorCategory
00066         : public boost::system::error_category
00067     {
00068     public:
00069         const char *name() const { return "HTTPParser"; }
00070         std::string message(int ev) const {
00071             switch (ev) {
00072             case ERROR_METHOD_CHAR:
00073                 return "invalid method character";
00074             case ERROR_METHOD_SIZE:
00075                 return "method exceeds maximum size";
00076             case ERROR_URI_CHAR:
00077                 return "invalid URI character";
00078             case ERROR_URI_SIZE:
00079                 return "method exceeds maximum size";
00080             case ERROR_QUERY_CHAR:
00081                 return "invalid query string character";
00082             case ERROR_QUERY_SIZE:
00083                 return "query string exceeds maximum size";
00084             case ERROR_VERSION_EMPTY:
00085                 return "HTTP version undefined";
00086             case ERROR_VERSION_CHAR:
00087                 return "invalid version character";
00088             case ERROR_STATUS_EMPTY:
00089                 return "HTTP status undefined";
00090             case ERROR_STATUS_CHAR:
00091                 return "invalid status character";
00092             case ERROR_HEADER_CHAR:
00093                 return "invalid header character";
00094             case ERROR_HEADER_NAME_SIZE:
00095                 return "header name exceeds maximum size";
00096             case ERROR_HEADER_VALUE_SIZE:
00097                 return "header value exceeds maximum size";
00098             case ERROR_INVALID_CONTENT_LENGTH:
00099                 return "invalid Content-Length header";
00100             case ERROR_CHUNK_CHAR:
00101                 return "invalid chunk character";
00102             case ERROR_MISSING_HEADER_DATA:
00103                 return "missing header data";
00104             case ERROR_MISSING_CHUNK_DATA:
00105                 return "missing chunk data";
00106             case ERROR_MISSING_TOO_MUCH_CONTENT:
00107                 return "missing too much content";
00108             }
00109             return "HTTPParser error";
00110         }
00111     };
00112 
00120     HTTPParser(const bool is_request, std::size_t max_content_length = DEFAULT_CONTENT_MAX)
00121         : m_logger(PION_GET_LOGGER("pion.net.HTTPParser")), m_is_request(is_request),
00122         m_read_ptr(NULL), m_read_end_ptr(NULL), m_message_parse_state(PARSE_START),
00123         m_headers_parse_state(is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H),
00124         m_chunked_content_parse_state(PARSE_CHUNK_SIZE_START), m_status_code(0),
00125         m_bytes_content_remaining(0), m_bytes_content_read(0),
00126         m_bytes_last_read(0), m_bytes_total_read(0),
00127         m_max_content_length(max_content_length),
00128         m_parse_headers_only(false), m_save_raw_headers(false)
00129     {}
00130 
00132     virtual ~HTTPParser() {}
00133 
00145     boost::tribool parse(HTTPMessage& http_msg, boost::system::error_code& ec);
00146 
00159     boost::tribool parseMissingData(HTTPMessage& http_msg, std::size_t len,
00160         boost::system::error_code& ec);
00161 
00167     void finish(HTTPMessage& http_msg) const;
00168 
00175     inline void setReadBuffer(const char *ptr, size_t len) {
00176         m_read_ptr = ptr;
00177         m_read_end_ptr = ptr + len;
00178     }
00179 
00186     inline void loadReadPosition(const char *&read_ptr, const char *&read_end_ptr) const {
00187         read_ptr = m_read_ptr;
00188         read_end_ptr = m_read_end_ptr;
00189     }
00190 
00199     inline bool checkPrematureEOF(HTTPMessage& http_msg) {
00200         if (m_message_parse_state != PARSE_CONTENT_NO_LENGTH)
00201             return true;
00202         m_message_parse_state = PARSE_END;
00203         http_msg.concatenateChunks();
00204         finish(http_msg);
00205         return false;
00206     }
00207 
00213     inline void parseHeadersOnly(bool b = true) { m_parse_headers_only = b; }
00214 
00220     inline void skipHeaderParsing(HTTPMessage& http_msg) {
00221         boost::system::error_code ec;
00222         finishHeaderParsing(http_msg, ec);
00223     }
00224     
00226     inline void reset(void) {
00227         m_message_parse_state = PARSE_START;
00228         m_headers_parse_state = (m_is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H);
00229         m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
00230         m_status_code = 0;
00231         m_status_message.erase();
00232         m_method.erase();
00233         m_resource.erase();
00234         m_query_string.erase();
00235         m_raw_headers.erase();
00236         m_bytes_content_read = m_bytes_last_read = m_bytes_total_read = 0;
00237     }
00238 
00240     inline bool eof(void) const { return m_read_ptr == NULL || m_read_ptr >= m_read_end_ptr; }
00241 
00243     inline std::size_t bytes_available(void) const { return (eof() ? 0 : (std::size_t)(m_read_end_ptr - m_read_ptr)); } 
00244 
00246     inline std::size_t gcount(void) const { return m_bytes_last_read; }
00247 
00249     inline std::size_t getTotalBytesRead(void) const { return m_bytes_total_read; }
00250 
00252     inline std::size_t getContentBytesRead(void) const { return m_bytes_content_read; }
00253 
00255     inline std::size_t getMaxContentLength(void) const { return m_max_content_length; }
00256 
00258     inline const std::string& getRawHeaders(void) const { return m_raw_headers; }
00259 
00261     inline bool getSaveRawHeaders(void) const { return m_save_raw_headers; }
00262 
00264     inline bool isParsingRequest(void) const { return m_is_request; }
00265 
00267     inline bool isParsingResponse(void) const { return ! m_is_request; }
00268 
00270     inline void setMaxContentLength(std::size_t n) { m_max_content_length = n; }
00271 
00273     inline void resetMaxContentLength(void) { m_max_content_length = DEFAULT_CONTENT_MAX; }
00274 
00276     inline void setSaveRawHeaders(bool b) { m_save_raw_headers = b; }
00277 
00279     inline void setLogger(PionLogger log_ptr) { m_logger = log_ptr; }
00280 
00282     inline PionLogger getLogger(void) { return m_logger; }
00283 
00284 
00295     static bool parseURLEncoded(HTTPTypes::QueryParams& dict,
00296                                 const char *ptr, const std::size_t len);
00297 
00309     static bool parseCookieHeader(HTTPTypes::CookieParams& dict,
00310                                   const char *ptr, const std::size_t len,
00311                                   bool set_cookie_header);
00312 
00323     static inline bool parseCookieHeader(HTTPTypes::CookieParams& dict,
00324         const std::string& cookie_header, bool set_cookie_header)
00325     {
00326         return parseCookieHeader(dict, cookie_header.c_str(), cookie_header.size(), set_cookie_header);
00327     }
00328 
00338     static inline bool parseURLEncoded(HTTPTypes::QueryParams& dict,
00339         const std::string& query)
00340     {
00341         return parseURLEncoded(dict, query.c_str(), query.size());
00342     }
00343 
00353     static bool parseForwardedFor(const std::string& header, std::string& public_ip);
00354     
00356     static inline ErrorCategory& getErrorCategory(void) {
00357         boost::call_once(HTTPParser::createErrorCategory, m_instance_flag);
00358         return *m_error_category_ptr;
00359     }
00360 
00361 
00362 protected:
00363 
00376     boost::tribool parseHeaders(HTTPMessage& http_msg, boost::system::error_code& ec);
00377 
00383     void updateMessageWithHeaderData(HTTPMessage& http_msg) const;
00384 
00397     boost::tribool finishHeaderParsing(HTTPMessage& http_msg,
00398         boost::system::error_code& ec);
00399 
00411     boost::tribool parseChunks(HTTPMessage::ChunkCache& chunk_buffers,
00412         boost::system::error_code& ec);
00413 
00425     boost::tribool consumeContent(HTTPMessage& http_msg,
00426         boost::system::error_code& ec);
00427 
00435     std::size_t consumeContentAsNextChunk(HTTPMessage::ChunkCache& chunk_buffers);
00436 
00442     static void computeMsgStatus(HTTPMessage& http_msg, bool msg_parsed_ok);
00443 
00450     static inline void setError(boost::system::error_code& ec, ErrorValue ev) {
00451         ec = boost::system::error_code(static_cast<int>(ev), getErrorCategory());
00452     }
00453 
00455     static void createErrorCategory(void);
00456 
00457 
00458     // misc functions used by the parsing functions
00459     inline static bool isChar(int c);
00460     inline static bool isControl(int c);
00461     inline static bool isSpecial(int c);
00462     inline static bool isDigit(int c);
00463     inline static bool isHexDigit(int c);
00464     inline static bool isCookieAttribute(const std::string& name, bool set_cookie_header);
00465 
00466 
00468     static const boost::uint32_t        STATUS_MESSAGE_MAX;
00469 
00471     static const boost::uint32_t        METHOD_MAX;
00472 
00474     static const boost::uint32_t        RESOURCE_MAX;
00475 
00477     static const boost::uint32_t        QUERY_STRING_MAX;
00478 
00480     static const boost::uint32_t        HEADER_NAME_MAX;
00481 
00483     static const boost::uint32_t        HEADER_VALUE_MAX;
00484 
00486     static const boost::uint32_t        QUERY_NAME_MAX;
00487 
00489     static const boost::uint32_t        QUERY_VALUE_MAX;
00490 
00492     static const boost::uint32_t        COOKIE_NAME_MAX;
00493 
00495     static const boost::uint32_t        COOKIE_VALUE_MAX;
00496 
00497 
00499     mutable PionLogger                  m_logger;
00500 
00502     const bool                          m_is_request;
00503 
00505     const char *                        m_read_ptr;
00506 
00508     const char *                        m_read_end_ptr;
00509 
00510 
00511 private:
00512 
00514     enum MessageParseState {
00515         PARSE_START, PARSE_HEADERS, PARSE_CONTENT,
00516         PARSE_CONTENT_NO_LENGTH, PARSE_CHUNKS, PARSE_END
00517     };
00518 
00521     enum HeadersParseState {
00522         PARSE_METHOD_START, PARSE_METHOD, PARSE_URI_STEM, PARSE_URI_QUERY,
00523         PARSE_HTTP_VERSION_H, PARSE_HTTP_VERSION_T_1, PARSE_HTTP_VERSION_T_2,
00524         PARSE_HTTP_VERSION_P, PARSE_HTTP_VERSION_SLASH,
00525         PARSE_HTTP_VERSION_MAJOR_START, PARSE_HTTP_VERSION_MAJOR,
00526         PARSE_HTTP_VERSION_MINOR_START, PARSE_HTTP_VERSION_MINOR,
00527         PARSE_STATUS_CODE_START, PARSE_STATUS_CODE, PARSE_STATUS_MESSAGE,
00528         PARSE_EXPECTING_NEWLINE, PARSE_EXPECTING_CR,
00529         PARSE_HEADER_WHITESPACE, PARSE_HEADER_START, PARSE_HEADER_NAME,
00530         PARSE_SPACE_BEFORE_HEADER_VALUE, PARSE_HEADER_VALUE,
00531         PARSE_EXPECTING_FINAL_NEWLINE, PARSE_EXPECTING_FINAL_CR
00532     };
00533 
00536     enum ChunkedContentParseState {
00537         PARSE_CHUNK_SIZE_START, PARSE_CHUNK_SIZE, 
00538         PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE,
00539         PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE, PARSE_CHUNK, 
00540         PARSE_EXPECTING_CR_AFTER_CHUNK, PARSE_EXPECTING_LF_AFTER_CHUNK,
00541         PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK, 
00542         PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK
00543     };
00544 
00545 
00547     MessageParseState                   m_message_parse_state;
00548 
00550     HeadersParseState                   m_headers_parse_state;
00551 
00553     ChunkedContentParseState            m_chunked_content_parse_state;
00554 
00556     boost::uint16_t                     m_status_code;
00557 
00559     std::string                         m_status_message;
00560 
00562     std::string                         m_method;
00563 
00565     std::string                         m_resource;
00566 
00568     std::string                         m_query_string;
00569 
00571     std::string                         m_raw_headers;
00572 
00574     std::string                         m_header_name;
00575 
00577     std::string                         m_header_value;
00578 
00580     std::string                         m_chunk_size_str;
00581 
00583     std::size_t                         m_size_of_current_chunk;
00584 
00586     std::size_t                         m_bytes_read_in_current_chunk;
00587 
00589     std::size_t                         m_bytes_content_remaining;
00590 
00592     std::size_t                         m_bytes_content_read;
00593 
00595     std::size_t                         m_bytes_last_read;
00596 
00598     std::size_t                         m_bytes_total_read;
00599 
00601     std::size_t                         m_max_content_length;
00602     
00604     bool                                m_parse_headers_only;
00605 
00607     bool                                m_save_raw_headers;
00608 
00610     static ErrorCategory *              m_error_category_ptr;
00611         
00613     static boost::once_flag             m_instance_flag;
00614 };
00615 
00616 
00617 // inline functions for HTTPParser
00618 
00619 inline bool HTTPParser::isChar(int c)
00620 {
00621     return(c >= 0 && c <= 127);
00622 }
00623 
00624 inline bool HTTPParser::isControl(int c)
00625 {
00626     return( (c >= 0 && c <= 31) || c == 127);
00627 }
00628 
00629 inline bool HTTPParser::isSpecial(int c)
00630 {
00631     switch (c) {
00632     case '(': case ')': case '<': case '>': case '@':
00633     case ',': case ';': case ':': case '\\': case '"':
00634     case '/': case '[': case ']': case '?': case '=':
00635     case '{': case '}': case ' ': case '\t':
00636         return true;
00637     default:
00638         return false;
00639     }
00640 }
00641 
00642 inline bool HTTPParser::isDigit(int c)
00643 {
00644     return(c >= '0' && c <= '9');
00645 }
00646 
00647 inline bool HTTPParser::isHexDigit(int c)
00648 {
00649     return((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
00650 }
00651 
00652 inline bool HTTPParser::isCookieAttribute(const std::string& name, bool set_cookie_header)
00653 {
00654     return (name.empty() || name[0] == '$' || (set_cookie_header &&
00655         (name=="Comment" || name=="Domain" || name=="Max-Age" || name=="Path" || name=="Secure" || name=="Version" || name=="Expires")
00656         ) );
00657 }
00658 
00659 }   // end namespace net
00660 }   // end namespace pion
00661 
00662 #endif