pion-net
4.0.9
|
00001 // ------------------------------------------------------------------ 00002 // pion-net: a C++ framework for building lightweight HTTP interfaces 00003 // ------------------------------------------------------------------ 00004 // Copyright (C) 2007-2008 Atomic Labs, Inc. (http://www.atomiclabs.com) 00005 // 00006 // Distributed under the Boost Software License, Version 1.0. 00007 // See http://www.boost.org/LICENSE_1_0.txt 00008 // 00009 00010 #ifndef __PION_HTTPPARSER_HEADER__ 00011 #define __PION_HTTPPARSER_HEADER__ 00012 00013 #include <string> 00014 #include <boost/noncopyable.hpp> 00015 #include <boost/logic/tribool.hpp> 00016 #include <boost/system/error_code.hpp> 00017 #include <boost/thread/once.hpp> 00018 #include <pion/PionConfig.hpp> 00019 #include <pion/PionLogger.hpp> 00020 #include <pion/net/HTTPMessage.hpp> 00021 00022 00023 namespace pion { // begin namespace pion 00024 namespace net { // begin namespace net (Pion Network Library) 00025 00026 // forward declarations used for finishing HTTP messages 00027 class HTTPRequest; 00028 class HTTPResponse; 00029 00033 class PION_NET_API HTTPParser : 00034 private boost::noncopyable 00035 { 00036 00037 public: 00038 00040 static const std::size_t DEFAULT_CONTENT_MAX; 00041 00043 enum ErrorValue { 00044 ERROR_METHOD_CHAR = 1, 00045 ERROR_METHOD_SIZE, 00046 ERROR_URI_CHAR, 00047 ERROR_URI_SIZE, 00048 ERROR_QUERY_CHAR, 00049 ERROR_QUERY_SIZE, 00050 ERROR_VERSION_EMPTY, 00051 ERROR_VERSION_CHAR, 00052 ERROR_STATUS_EMPTY, 00053 ERROR_STATUS_CHAR, 00054 ERROR_HEADER_CHAR, 00055 ERROR_HEADER_NAME_SIZE, 00056 ERROR_HEADER_VALUE_SIZE, 00057 ERROR_INVALID_CONTENT_LENGTH, 00058 ERROR_CHUNK_CHAR, 00059 ERROR_MISSING_CHUNK_DATA, 00060 ERROR_MISSING_HEADER_DATA, 00061 ERROR_MISSING_TOO_MUCH_CONTENT, 00062 }; 00063 00065 class ErrorCategory 00066 : public boost::system::error_category 00067 { 00068 public: 00069 const char *name() const { return "HTTPParser"; } 00070 std::string message(int ev) const { 00071 switch (ev) { 00072 case ERROR_METHOD_CHAR: 00073 return "invalid method character"; 00074 case ERROR_METHOD_SIZE: 00075 return "method exceeds maximum size"; 00076 case ERROR_URI_CHAR: 00077 return "invalid URI character"; 00078 case ERROR_URI_SIZE: 00079 return "method exceeds maximum size"; 00080 case ERROR_QUERY_CHAR: 00081 return "invalid query string character"; 00082 case ERROR_QUERY_SIZE: 00083 return "query string exceeds maximum size"; 00084 case ERROR_VERSION_EMPTY: 00085 return "HTTP version undefined"; 00086 case ERROR_VERSION_CHAR: 00087 return "invalid version character"; 00088 case ERROR_STATUS_EMPTY: 00089 return "HTTP status undefined"; 00090 case ERROR_STATUS_CHAR: 00091 return "invalid status character"; 00092 case ERROR_HEADER_CHAR: 00093 return "invalid header character"; 00094 case ERROR_HEADER_NAME_SIZE: 00095 return "header name exceeds maximum size"; 00096 case ERROR_HEADER_VALUE_SIZE: 00097 return "header value exceeds maximum size"; 00098 case ERROR_INVALID_CONTENT_LENGTH: 00099 return "invalid Content-Length header"; 00100 case ERROR_CHUNK_CHAR: 00101 return "invalid chunk character"; 00102 case ERROR_MISSING_HEADER_DATA: 00103 return "missing header data"; 00104 case ERROR_MISSING_CHUNK_DATA: 00105 return "missing chunk data"; 00106 case ERROR_MISSING_TOO_MUCH_CONTENT: 00107 return "missing too much content"; 00108 } 00109 return "HTTPParser error"; 00110 } 00111 }; 00112 00120 HTTPParser(const bool is_request, std::size_t max_content_length = DEFAULT_CONTENT_MAX) 00121 : m_logger(PION_GET_LOGGER("pion.net.HTTPParser")), m_is_request(is_request), 00122 m_read_ptr(NULL), m_read_end_ptr(NULL), m_message_parse_state(PARSE_START), 00123 m_headers_parse_state(is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H), 00124 m_chunked_content_parse_state(PARSE_CHUNK_SIZE_START), m_status_code(0), 00125 m_bytes_content_remaining(0), m_bytes_content_read(0), 00126 m_bytes_last_read(0), m_bytes_total_read(0), 00127 m_max_content_length(max_content_length), 00128 m_parse_headers_only(false), m_save_raw_headers(false) 00129 {} 00130 00132 virtual ~HTTPParser() {} 00133 00145 boost::tribool parse(HTTPMessage& http_msg, boost::system::error_code& ec); 00146 00159 boost::tribool parseMissingData(HTTPMessage& http_msg, std::size_t len, 00160 boost::system::error_code& ec); 00161 00167 void finish(HTTPMessage& http_msg) const; 00168 00175 inline void setReadBuffer(const char *ptr, size_t len) { 00176 m_read_ptr = ptr; 00177 m_read_end_ptr = ptr + len; 00178 } 00179 00186 inline void loadReadPosition(const char *&read_ptr, const char *&read_end_ptr) const { 00187 read_ptr = m_read_ptr; 00188 read_end_ptr = m_read_end_ptr; 00189 } 00190 00199 inline bool checkPrematureEOF(HTTPMessage& http_msg) { 00200 if (m_message_parse_state != PARSE_CONTENT_NO_LENGTH) 00201 return true; 00202 m_message_parse_state = PARSE_END; 00203 http_msg.concatenateChunks(); 00204 finish(http_msg); 00205 return false; 00206 } 00207 00213 inline void parseHeadersOnly(bool b = true) { m_parse_headers_only = b; } 00214 00220 inline void skipHeaderParsing(HTTPMessage& http_msg) { 00221 boost::system::error_code ec; 00222 finishHeaderParsing(http_msg, ec); 00223 } 00224 00226 inline void reset(void) { 00227 m_message_parse_state = PARSE_START; 00228 m_headers_parse_state = (m_is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H); 00229 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START; 00230 m_status_code = 0; 00231 m_status_message.erase(); 00232 m_method.erase(); 00233 m_resource.erase(); 00234 m_query_string.erase(); 00235 m_raw_headers.erase(); 00236 m_bytes_content_read = m_bytes_last_read = m_bytes_total_read = 0; 00237 } 00238 00240 inline bool eof(void) const { return m_read_ptr == NULL || m_read_ptr >= m_read_end_ptr; } 00241 00243 inline std::size_t bytes_available(void) const { return (eof() ? 0 : (std::size_t)(m_read_end_ptr - m_read_ptr)); } 00244 00246 inline std::size_t gcount(void) const { return m_bytes_last_read; } 00247 00249 inline std::size_t getTotalBytesRead(void) const { return m_bytes_total_read; } 00250 00252 inline std::size_t getContentBytesRead(void) const { return m_bytes_content_read; } 00253 00255 inline std::size_t getMaxContentLength(void) const { return m_max_content_length; } 00256 00258 inline const std::string& getRawHeaders(void) const { return m_raw_headers; } 00259 00261 inline bool getSaveRawHeaders(void) const { return m_save_raw_headers; } 00262 00264 inline bool isParsingRequest(void) const { return m_is_request; } 00265 00267 inline bool isParsingResponse(void) const { return ! m_is_request; } 00268 00270 inline void setMaxContentLength(std::size_t n) { m_max_content_length = n; } 00271 00273 inline void resetMaxContentLength(void) { m_max_content_length = DEFAULT_CONTENT_MAX; } 00274 00276 inline void setSaveRawHeaders(bool b) { m_save_raw_headers = b; } 00277 00279 inline void setLogger(PionLogger log_ptr) { m_logger = log_ptr; } 00280 00282 inline PionLogger getLogger(void) { return m_logger; } 00283 00284 00295 static bool parseURLEncoded(HTTPTypes::QueryParams& dict, 00296 const char *ptr, const std::size_t len); 00297 00309 static bool parseCookieHeader(HTTPTypes::CookieParams& dict, 00310 const char *ptr, const std::size_t len, 00311 bool set_cookie_header); 00312 00323 static inline bool parseCookieHeader(HTTPTypes::CookieParams& dict, 00324 const std::string& cookie_header, bool set_cookie_header) 00325 { 00326 return parseCookieHeader(dict, cookie_header.c_str(), cookie_header.size(), set_cookie_header); 00327 } 00328 00338 static inline bool parseURLEncoded(HTTPTypes::QueryParams& dict, 00339 const std::string& query) 00340 { 00341 return parseURLEncoded(dict, query.c_str(), query.size()); 00342 } 00343 00353 static bool parseForwardedFor(const std::string& header, std::string& public_ip); 00354 00356 static inline ErrorCategory& getErrorCategory(void) { 00357 boost::call_once(HTTPParser::createErrorCategory, m_instance_flag); 00358 return *m_error_category_ptr; 00359 } 00360 00361 00362 protected: 00363 00376 boost::tribool parseHeaders(HTTPMessage& http_msg, boost::system::error_code& ec); 00377 00383 void updateMessageWithHeaderData(HTTPMessage& http_msg) const; 00384 00397 boost::tribool finishHeaderParsing(HTTPMessage& http_msg, 00398 boost::system::error_code& ec); 00399 00411 boost::tribool parseChunks(HTTPMessage::ChunkCache& chunk_buffers, 00412 boost::system::error_code& ec); 00413 00425 boost::tribool consumeContent(HTTPMessage& http_msg, 00426 boost::system::error_code& ec); 00427 00435 std::size_t consumeContentAsNextChunk(HTTPMessage::ChunkCache& chunk_buffers); 00436 00442 static void computeMsgStatus(HTTPMessage& http_msg, bool msg_parsed_ok); 00443 00450 static inline void setError(boost::system::error_code& ec, ErrorValue ev) { 00451 ec = boost::system::error_code(static_cast<int>(ev), getErrorCategory()); 00452 } 00453 00455 static void createErrorCategory(void); 00456 00457 00458 // misc functions used by the parsing functions 00459 inline static bool isChar(int c); 00460 inline static bool isControl(int c); 00461 inline static bool isSpecial(int c); 00462 inline static bool isDigit(int c); 00463 inline static bool isHexDigit(int c); 00464 inline static bool isCookieAttribute(const std::string& name, bool set_cookie_header); 00465 00466 00468 static const boost::uint32_t STATUS_MESSAGE_MAX; 00469 00471 static const boost::uint32_t METHOD_MAX; 00472 00474 static const boost::uint32_t RESOURCE_MAX; 00475 00477 static const boost::uint32_t QUERY_STRING_MAX; 00478 00480 static const boost::uint32_t HEADER_NAME_MAX; 00481 00483 static const boost::uint32_t HEADER_VALUE_MAX; 00484 00486 static const boost::uint32_t QUERY_NAME_MAX; 00487 00489 static const boost::uint32_t QUERY_VALUE_MAX; 00490 00492 static const boost::uint32_t COOKIE_NAME_MAX; 00493 00495 static const boost::uint32_t COOKIE_VALUE_MAX; 00496 00497 00499 mutable PionLogger m_logger; 00500 00502 const bool m_is_request; 00503 00505 const char * m_read_ptr; 00506 00508 const char * m_read_end_ptr; 00509 00510 00511 private: 00512 00514 enum MessageParseState { 00515 PARSE_START, PARSE_HEADERS, PARSE_CONTENT, 00516 PARSE_CONTENT_NO_LENGTH, PARSE_CHUNKS, PARSE_END 00517 }; 00518 00521 enum HeadersParseState { 00522 PARSE_METHOD_START, PARSE_METHOD, PARSE_URI_STEM, PARSE_URI_QUERY, 00523 PARSE_HTTP_VERSION_H, PARSE_HTTP_VERSION_T_1, PARSE_HTTP_VERSION_T_2, 00524 PARSE_HTTP_VERSION_P, PARSE_HTTP_VERSION_SLASH, 00525 PARSE_HTTP_VERSION_MAJOR_START, PARSE_HTTP_VERSION_MAJOR, 00526 PARSE_HTTP_VERSION_MINOR_START, PARSE_HTTP_VERSION_MINOR, 00527 PARSE_STATUS_CODE_START, PARSE_STATUS_CODE, PARSE_STATUS_MESSAGE, 00528 PARSE_EXPECTING_NEWLINE, PARSE_EXPECTING_CR, 00529 PARSE_HEADER_WHITESPACE, PARSE_HEADER_START, PARSE_HEADER_NAME, 00530 PARSE_SPACE_BEFORE_HEADER_VALUE, PARSE_HEADER_VALUE, 00531 PARSE_EXPECTING_FINAL_NEWLINE, PARSE_EXPECTING_FINAL_CR 00532 }; 00533 00536 enum ChunkedContentParseState { 00537 PARSE_CHUNK_SIZE_START, PARSE_CHUNK_SIZE, 00538 PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE, 00539 PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE, PARSE_CHUNK, 00540 PARSE_EXPECTING_CR_AFTER_CHUNK, PARSE_EXPECTING_LF_AFTER_CHUNK, 00541 PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK, 00542 PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK 00543 }; 00544 00545 00547 MessageParseState m_message_parse_state; 00548 00550 HeadersParseState m_headers_parse_state; 00551 00553 ChunkedContentParseState m_chunked_content_parse_state; 00554 00556 boost::uint16_t m_status_code; 00557 00559 std::string m_status_message; 00560 00562 std::string m_method; 00563 00565 std::string m_resource; 00566 00568 std::string m_query_string; 00569 00571 std::string m_raw_headers; 00572 00574 std::string m_header_name; 00575 00577 std::string m_header_value; 00578 00580 std::string m_chunk_size_str; 00581 00583 std::size_t m_size_of_current_chunk; 00584 00586 std::size_t m_bytes_read_in_current_chunk; 00587 00589 std::size_t m_bytes_content_remaining; 00590 00592 std::size_t m_bytes_content_read; 00593 00595 std::size_t m_bytes_last_read; 00596 00598 std::size_t m_bytes_total_read; 00599 00601 std::size_t m_max_content_length; 00602 00604 bool m_parse_headers_only; 00605 00607 bool m_save_raw_headers; 00608 00610 static ErrorCategory * m_error_category_ptr; 00611 00613 static boost::once_flag m_instance_flag; 00614 }; 00615 00616 00617 // inline functions for HTTPParser 00618 00619 inline bool HTTPParser::isChar(int c) 00620 { 00621 return(c >= 0 && c <= 127); 00622 } 00623 00624 inline bool HTTPParser::isControl(int c) 00625 { 00626 return( (c >= 0 && c <= 31) || c == 127); 00627 } 00628 00629 inline bool HTTPParser::isSpecial(int c) 00630 { 00631 switch (c) { 00632 case '(': case ')': case '<': case '>': case '@': 00633 case ',': case ';': case ':': case '\\': case '"': 00634 case '/': case '[': case ']': case '?': case '=': 00635 case '{': case '}': case ' ': case '\t': 00636 return true; 00637 default: 00638 return false; 00639 } 00640 } 00641 00642 inline bool HTTPParser::isDigit(int c) 00643 { 00644 return(c >= '0' && c <= '9'); 00645 } 00646 00647 inline bool HTTPParser::isHexDigit(int c) 00648 { 00649 return((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')); 00650 } 00651 00652 inline bool HTTPParser::isCookieAttribute(const std::string& name, bool set_cookie_header) 00653 { 00654 return (name.empty() || name[0] == '$' || (set_cookie_header && 00655 (name=="Comment" || name=="Domain" || name=="Max-Age" || name=="Path" || name=="Secure" || name=="Version" || name=="Expires") 00656 ) ); 00657 } 00658 00659 } // end namespace net 00660 } // end namespace pion 00661 00662 #endif