reader.h
1 #ifndef RAPIDJSON_READER_H_
2 #define RAPIDJSON_READER_H_
3 
4 // Copyright (c) 2011 Milo Yip (miloyip@gmail.com)
5 // Version 0.1
6 
7 #include "rapidjson.h"
8 #include "encodings.h"
9 #include "internal/pow10.h"
10 #include "internal/stack.h"
11 
12 #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
13 #include <intrin.h>
14 #pragma intrinsic(_BitScanForward)
15 #endif
16 #ifdef RAPIDJSON_SSE42
17 #include <nmmintrin.h>
18 #elif defined(RAPIDJSON_SSE2)
19 #include <emmintrin.h>
20 #endif
21 
22 #ifdef _MSC_VER
23 RAPIDJSON_DIAG_PUSH
24 RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
25 RAPIDJSON_DIAG_OFF(4702) // unreachable code
26 #endif
27 
28 #define RAPIDJSON_NOTHING /* deliberately empty */
29 #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
30 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
31  RAPIDJSON_MULTILINEMACRO_BEGIN \
32  if (HasParseError()) { return value; } \
33  RAPIDJSON_MULTILINEMACRO_END
34 #endif
35 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
36  RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING)
37 
38 #ifndef RAPIDJSON_PARSE_ERROR_NORETURN
39 #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \
40  RAPIDJSON_MULTILINEMACRO_BEGIN \
41  RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \
42  parseResult_.Set(parseErrorCode,offset); \
43  RAPIDJSON_MULTILINEMACRO_END
44 #endif
45 
46 #ifndef RAPIDJSON_PARSE_ERROR
47 #define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \
48  RAPIDJSON_MULTILINEMACRO_BEGIN \
49  RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \
50  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \
51  RAPIDJSON_MULTILINEMACRO_END
52 #endif
53 
54 #include "error/error.h" // ParseErrorCode, ParseResult
55 
56 namespace rapidjson {
57 
58 ///////////////////////////////////////////////////////////////////////////////
59 // ParseFlag
60 
61 //! Combination of parseFlags
62 /*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream
63  */
64 enum ParseFlag {
65  kParseDefaultFlags = 0, //!< Default parse flags. Non-destructive parsing. Text strings are decoded into allocated buffer.
66  kParseInsituFlag = 1, //!< In-situ(destructive) parsing.
67  kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
68  kParseIterativeFlag = 4, //!< Iterative(constant complexity in terms of function call stack size) parsing.
69  kParseStopWhenDoneFlag = 8 //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error.
70 };
71 
72 ///////////////////////////////////////////////////////////////////////////////
73 // Handler
74 
75 /*! \class rapidjson::Handler
76  \brief Concept for receiving events from GenericReader upon parsing.
77  The functions return true if no error occurs. If they return false,
78  the event publisher should terminate the process.
79 \code
80 concept Handler {
81  typename Ch;
82 
83  bool Null();
84  bool Bool(bool b);
85  bool Int(int i);
86  bool Uint(unsigned i);
87  bool Int64(int64_t i);
88  bool Uint64(uint64_t i);
89  bool Double(double d);
90  bool String(const Ch* str, SizeType length, bool copy);
91  bool StartObject();
92  bool EndObject(SizeType memberCount);
93  bool StartArray();
94  bool EndArray(SizeType elementCount);
95 };
96 \endcode
97 */
98 ///////////////////////////////////////////////////////////////////////////////
99 // BaseReaderHandler
100 
101 //! Default implementation of Handler.
102 /*! This can be used as base class of any reader handler.
103  \note implements Handler concept
104 */
105 template<typename Encoding = UTF8<> >
107  typedef typename Encoding::Ch Ch;
108 
109  bool Default() { return true; }
110  bool Null() { return Default(); }
111  bool Bool(bool) { return Default(); }
112  bool Int(int) { return Default(); }
113  bool Uint(unsigned) { return Default(); }
114  bool Int64(int64_t) { return Default(); }
115  bool Uint64(uint64_t) { return Default(); }
116  bool Double(double) { return Default(); }
117  bool String(const Ch*, SizeType, bool) { return Default(); }
118  bool StartObject() { return Default(); }
119  bool EndObject(SizeType) { return Default(); }
120  bool StartArray() { return Default(); }
121  bool EndArray(SizeType) { return Default(); }
122 };
123 
124 ///////////////////////////////////////////////////////////////////////////////
125 // StreamLocalCopy
126 
127 namespace internal {
128 
129 template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
130 class StreamLocalCopy;
131 
132 //! Do copy optimization.
133 template<typename Stream>
134 class StreamLocalCopy<Stream, 1> {
135 public:
136  StreamLocalCopy(Stream& original) : s(original), original_(original) {}
137  ~StreamLocalCopy() { original_ = s; }
138 
139  Stream s;
140 
141 private:
142  StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
143 
144  Stream& original_;
145 };
146 
147 //! Keep reference.
148 template<typename Stream>
149 class StreamLocalCopy<Stream, 0> {
150 public:
151  StreamLocalCopy(Stream& original) : s(original) {}
152 
153  Stream& s;
154 
155 private:
156  StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
157 };
158 
159 } // namespace internal
160 
161 ///////////////////////////////////////////////////////////////////////////////
162 // SkipWhitespace
163 
164 //! Skip the JSON white spaces in a stream.
165 /*! \param is A input stream for skipping white spaces.
166  \note This function has SSE2/SSE4.2 specialization.
167 */
168 template<typename InputStream>
169 void SkipWhitespace(InputStream& is) {
170  internal::StreamLocalCopy<InputStream> copy(is);
171  InputStream& s(copy.s);
172 
173  while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t')
174  s.Take();
175 }
176 
177 #ifdef RAPIDJSON_SSE42
178 //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
179 inline const char *SkipWhitespace_SIMD(const char* p) {
180  static const char whitespace[16] = " \n\r\t";
181  static const char whitespaces[4][17] = {
182  " ",
183  "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
184  "\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r",
185  "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"};
186 
187  // 16-byte align to the lower boundary
188  const char* ap = reinterpret_cast<const char*>(reinterpret_cast<size_t>(p) & ~15);
189 
190  // Test first unaligned characters
191  // Cannot make use of _mm_cmpistrm() because it stops when encounters '\0' before p
192  if (ap != p) {
193  const __m128i w0 = _mm_loadu_si128((const __m128i *)&whitespaces[0][0]);
194  const __m128i w1 = _mm_loadu_si128((const __m128i *)&whitespaces[1][0]);
195  const __m128i w2 = _mm_loadu_si128((const __m128i *)&whitespaces[2][0]);
196  const __m128i w3 = _mm_loadu_si128((const __m128i *)&whitespaces[3][0]);
197 
198  unsigned char shift = reinterpret_cast<size_t>(p) & 15;
199  const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i*>(ap));
200  __m128i x = _mm_cmpeq_epi8(s, w0);
201  x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
202  x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
203  x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
204  unsigned short r = (unsigned short)~_mm_movemask_epi8(x);
205  r = r >> shift << shift; // Clear results before p
206  if (r != 0) {
207 #ifdef _MSC_VER // Find the index of first non-whitespace
208  unsigned long offset;
209  _BitScanForward(&offset, r);
210  return ap + offset;
211 #else
212  return ap + __builtin_ffs(r) - 1;
213 #endif
214  }
215  ap += 16;
216  }
217 
218  const __m128i w = _mm_loadu_si128((const __m128i *)&whitespace[0]);
219 
220  // The rest of string
221  for (;; ap += 16) {
222  const __m128i s = _mm_load_si128((const __m128i *)ap);
223  const unsigned r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY));
224  if (r != 0) { // some of characters is non-whitespace
225 #ifdef _MSC_VER // Find the index of first non-whitespace
226  unsigned long offset;
227  _BitScanForward(&offset, r);
228  return ap + offset;
229 #else
230  return ap + __builtin_ffs(r) - 1;
231 #endif
232  }
233  }
234 }
235 
236 #elif defined(RAPIDJSON_SSE2)
237 
238 //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
239 inline const char *SkipWhitespace_SIMD(const char* p) {
240  static const char whitespaces[4][17] = {
241  " ",
242  "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
243  "\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r",
244  "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"};
245 
246  const __m128i w0 = _mm_loadu_si128((const __m128i *)&whitespaces[0][0]);
247  const __m128i w1 = _mm_loadu_si128((const __m128i *)&whitespaces[1][0]);
248  const __m128i w2 = _mm_loadu_si128((const __m128i *)&whitespaces[2][0]);
249  const __m128i w3 = _mm_loadu_si128((const __m128i *)&whitespaces[3][0]);
250 
251  // 16-byte align to the lower boundary
252  const char* ap = reinterpret_cast<const char*>(reinterpret_cast<size_t>(p) & ~15);
253 
254  // Test first unaligned characters
255  if (ap != p) {
256  unsigned char shift = reinterpret_cast<size_t>(p) & 15;
257  const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i*>(ap));
258  __m128i x = _mm_cmpeq_epi8(s, w0);
259  x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
260  x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
261  x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
262  unsigned short r = (unsigned short)~_mm_movemask_epi8(x);
263  r = r >> shift << shift; // Clear results before p
264  if (r != 0) {
265 #ifdef _MSC_VER // Find the index of first non-whitespace
266  unsigned long offset;
267  _BitScanForward(&offset, r);
268  return ap + offset;
269 #else
270  return ap + __builtin_ffs(r) - 1;
271 #endif
272  }
273  ap += 16;
274  }
275 
276  // The rest of string
277  for (;; ap += 16) {
278  const __m128i s = _mm_load_si128((const __m128i *)ap);
279  __m128i x = _mm_cmpeq_epi8(s, w0);
280  x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
281  x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
282  x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
283  unsigned short r = (unsigned short)~_mm_movemask_epi8(x);
284  if (r != 0) { // some of characters may be non-whitespace
285 #ifdef _MSC_VER // Find the index of first non-whitespace
286  unsigned long offset;
287  _BitScanForward(&offset, r);
288  return ap + offset;
289 #else
290  return ap + __builtin_ffs(r) - 1;
291 #endif
292  }
293  }
294 }
295 
296 #endif // RAPIDJSON_SSE2
297 
298 #ifdef RAPIDJSON_SIMD
299 //! Template function specialization for InsituStringStream
300 template<> inline void SkipWhitespace(InsituStringStream& is) {
301  is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_));
302 }
303 
304 //! Template function specialization for StringStream
305 template<> inline void SkipWhitespace(StringStream& is) {
306  is.src_ = SkipWhitespace_SIMD(is.src_);
307 }
308 #endif // RAPIDJSON_SIMD
309 
310 ///////////////////////////////////////////////////////////////////////////////
311 // GenericReader
312 
313 //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator.
314 /*! GenericReader parses JSON text from a stream, and send events synchronously to an
315  object implementing Handler concept.
316 
317  It needs to allocate a stack for storing a single decoded string during
318  non-destructive parsing.
319 
320  For in-situ parsing, the decoded string is directly written to the source
321  text string, no temporary buffer is required.
322 
323  A GenericReader object can be reused for parsing multiple JSON text.
324 
325  \tparam SourceEncoding Encoding of the input stream.
326  \tparam TargetEncoding Encoding of the parse output.
327  \tparam Allocator Allocator type for stack.
328 */
329 template <typename SourceEncoding, typename TargetEncoding, typename Allocator = MemoryPoolAllocator<> >
331 public:
332  typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type
333 
334  //! Constructor.
335  /*! \param allocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing)
336  \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing)
337  */
338  GenericReader(Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(allocator, stackCapacity), parseResult_() {}
339 
340  //! Parse JSON text.
341  /*! \tparam parseFlags Combination of \ref ParseFlag.
342  \tparam InputStream Type of input stream, implementing Stream concept.
343  \tparam Handler Type of handler, implementing Handler concept.
344  \param is Input stream to be parsed.
345  \param handler The handler to receive events.
346  \return Whether the parsing is successful.
347  */
348  template <unsigned parseFlags, typename InputStream, typename Handler>
349  ParseResult Parse(InputStream& is, Handler& handler) {
350  if (parseFlags & kParseIterativeFlag)
351  return IterativeParse<parseFlags>(is, handler);
352 
353  parseResult_.Clear();
354 
355  ClearStackOnExit scope(*this);
356 
357  SkipWhitespace(is);
358 
359  if (is.Peek() == '\0') {
360  RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell());
361  RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
362  }
363  else {
364  switch (is.Peek()) {
365  case '{': ParseObject<parseFlags>(is, handler); break;
366  case '[': ParseArray<parseFlags>(is, handler); break;
367  default: RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotObjectOrArray, is.Tell());
368  }
369  RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
370 
371  if (!(parseFlags & kParseStopWhenDoneFlag)) {
372  SkipWhitespace(is);
373 
374  if (is.Peek() != '\0') {
375  RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell());
376  RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
377  }
378  }
379  }
380 
381  return parseResult_;
382  }
383 
384  //! Parse JSON text (with \ref kParseDefaultFlags)
385  /*! \tparam InputStream Type of input stream, implementing Stream concept
386  \tparam Handler Type of handler, implementing Handler concept.
387  \param is Input stream to be parsed.
388  \param handler The handler to receive events.
389  \return Whether the parsing is successful.
390  */
391  template <typename InputStream, typename Handler>
392  ParseResult Parse(InputStream& is, Handler& handler) {
393  return Parse<kParseDefaultFlags>(is, handler);
394  }
395 
396  //! Whether a parse error has occured in the last parsing.
397  bool HasParseError() const { return parseResult_.IsError(); }
398 
399  //! Get the \ref ParseErrorCode of last parsing.
400  ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); }
401 
402  //! Get the position of last parsing error in input, 0 otherwise.
403  size_t GetErrorOffset() const { return parseResult_.Offset(); }
404 
405 private:
406  // Prohibit copy constructor & assignment operator.
408  GenericReader& operator=(const GenericReader&);
409 
410  void ClearStack() { stack_.Clear(); }
411 
412  // clear stack on any exit from ParseStream, e.g. due to exception
413  struct ClearStackOnExit {
414  explicit ClearStackOnExit(GenericReader& r) : r_(r) {}
415  ~ClearStackOnExit() { r_.ClearStack(); }
416  private:
417  GenericReader& r_;
418  ClearStackOnExit(const ClearStackOnExit&);
419  ClearStackOnExit& operator=(const ClearStackOnExit&);
420  };
421 
422  // Parse object: { string : value, ... }
423  template<unsigned parseFlags, typename InputStream, typename Handler>
424  void ParseObject(InputStream& is, Handler& handler) {
425  RAPIDJSON_ASSERT(is.Peek() == '{');
426  is.Take(); // Skip '{'
427 
428  if (!handler.StartObject())
429  RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
430 
431  SkipWhitespace(is);
432 
433  if (is.Peek() == '}') {
434  is.Take();
435  if (!handler.EndObject(0)) // empty object
436  RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
437  return;
438  }
439 
440  for (SizeType memberCount = 0;;) {
441  if (is.Peek() != '"')
442  RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
443 
444  ParseString<parseFlags>(is, handler);
445  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
446 
447  SkipWhitespace(is);
448 
449  if (is.Take() != ':')
450  RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
451 
452  SkipWhitespace(is);
453 
454  ParseValue<parseFlags>(is, handler);
455  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
456 
457  SkipWhitespace(is);
458 
459  ++memberCount;
460 
461  switch (is.Take()) {
462  case ',': SkipWhitespace(is); break;
463  case '}':
464  if (!handler.EndObject(memberCount))
465  RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
466  else
467  return;
468  default: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell());
469  }
470  }
471  }
472 
473  // Parse array: [ value, ... ]
474  template<unsigned parseFlags, typename InputStream, typename Handler>
475  void ParseArray(InputStream& is, Handler& handler) {
476  RAPIDJSON_ASSERT(is.Peek() == '[');
477  is.Take(); // Skip '['
478 
479  if (!handler.StartArray())
480  RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
481 
482  SkipWhitespace(is);
483 
484  if (is.Peek() == ']') {
485  is.Take();
486  if (!handler.EndArray(0)) // empty array
487  RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
488  return;
489  }
490 
491  for (SizeType elementCount = 0;;) {
492  ParseValue<parseFlags>(is, handler);
493  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
494 
495  ++elementCount;
496  SkipWhitespace(is);
497 
498  switch (is.Take()) {
499  case ',': SkipWhitespace(is); break;
500  case ']':
501  if (!handler.EndArray(elementCount))
502  RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
503  else
504  return;
505  default: RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
506  }
507  }
508  }
509 
510  template<unsigned parseFlags, typename InputStream, typename Handler>
511  void ParseNull(InputStream& is, Handler& handler) {
512  RAPIDJSON_ASSERT(is.Peek() == 'n');
513  is.Take();
514 
515  if (is.Take() == 'u' && is.Take() == 'l' && is.Take() == 'l') {
516  if (!handler.Null())
517  RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
518  }
519  else
520  RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1);
521  }
522 
523  template<unsigned parseFlags, typename InputStream, typename Handler>
524  void ParseTrue(InputStream& is, Handler& handler) {
525  RAPIDJSON_ASSERT(is.Peek() == 't');
526  is.Take();
527 
528  if (is.Take() == 'r' && is.Take() == 'u' && is.Take() == 'e') {
529  if (!handler.Bool(true))
530  RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
531  }
532  else
533  RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1);
534  }
535 
536  template<unsigned parseFlags, typename InputStream, typename Handler>
537  void ParseFalse(InputStream& is, Handler& handler) {
538  RAPIDJSON_ASSERT(is.Peek() == 'f');
539  is.Take();
540 
541  if (is.Take() == 'a' && is.Take() == 'l' && is.Take() == 's' && is.Take() == 'e') {
542  if (!handler.Bool(false))
543  RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
544  }
545  else
546  RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1);
547  }
548 
549  // Helper function to parse four hexidecimal digits in \uXXXX in ParseString().
550  template<typename InputStream>
551  unsigned ParseHex4(InputStream& is) {
552  unsigned codepoint = 0;
553  for (int i = 0; i < 4; i++) {
554  Ch c = is.Take();
555  codepoint <<= 4;
556  codepoint += static_cast<unsigned>(c);
557  if (c >= '0' && c <= '9')
558  codepoint -= '0';
559  else if (c >= 'A' && c <= 'F')
560  codepoint -= 'A' - 10;
561  else if (c >= 'a' && c <= 'f')
562  codepoint -= 'a' - 10;
563  else {
564  RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, is.Tell() - 1);
565  RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
566  }
567  }
568  return codepoint;
569  }
570 
571  class StackStream {
572  public:
573  typedef typename TargetEncoding::Ch Ch;
574 
575  StackStream(internal::Stack<Allocator>& stack) : stack_(stack), length_(0) {}
576  RAPIDJSON_FORCEINLINE void Put(Ch c) {
577  *stack_.template Push<Ch>() = c;
578  ++length_;
579  }
580  internal::Stack<Allocator>& stack_;
581  SizeType length_;
582 
583  private:
584  StackStream(const StackStream&);
585  StackStream& operator=(const StackStream&);
586  };
587 
588  // Parse string and generate String event. Different code paths for kParseInsituFlag.
589  template<unsigned parseFlags, typename InputStream, typename Handler>
590  void ParseString(InputStream& is, Handler& handler) {
591  internal::StreamLocalCopy<InputStream> copy(is);
592  InputStream& s(copy.s);
593 
594  if (parseFlags & kParseInsituFlag) {
595  typename InputStream::Ch *head = s.PutBegin();
596  ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s);
597  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
598  size_t length = s.PutEnd(head) - 1;
599  RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
600  if (!handler.String((typename TargetEncoding::Ch*)head, SizeType(length), false))
601  RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
602  }
603  else {
604  StackStream stackStream(stack_);
605  ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream);
606  RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
607  if (!handler.String(stack_.template Pop<typename TargetEncoding::Ch>(stackStream.length_), stackStream.length_ - 1, true))
608  RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
609  }
610  }
611 
612  // Parse string to an output is
613  // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation.
614  template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream>
615  RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) {
616 #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
617  static const char escape[256] = {
618  Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/',
619  Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
620  0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
621  0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
622  Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16
623  };
624 #undef Z16
625 
626  RAPIDJSON_ASSERT(is.Peek() == '\"');
627  is.Take(); // Skip '\"'
628 
629  for (;;) {
630  Ch c = is.Peek();
631  if (c == '\\') { // Escape
632  is.Take();
633  Ch e = is.Take();
634  if ((sizeof(Ch) == 1 || unsigned(e) < 256) && escape[(unsigned char)e]) {
635  os.Put(escape[(unsigned char)e]);
636  }
637  else if (e == 'u') { // Unicode
638  unsigned codepoint = ParseHex4(is);
639  if (codepoint >= 0xD800 && codepoint <= 0xDBFF) {
640  // Handle UTF-16 surrogate pair
641  if (is.Take() != '\\' || is.Take() != 'u')
642  RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, is.Tell() - 2);
643  unsigned codepoint2 = ParseHex4(is);
644  if (codepoint2 < 0xDC00 || codepoint2 > 0xDFFF)
645  RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, is.Tell() - 2);
646  codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
647  }
648  TEncoding::Encode(os, codepoint);
649  }
650  else
651  RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell() - 1);
652  }
653  else if (c == '"') { // Closing double quote
654  is.Take();
655  os.Put('\0'); // null-terminate the string
656  return;
657  }
658  else if (c == '\0')
659  RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell() - 1);
660  else if ((unsigned)c < 0x20) // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
661  RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell() - 1);
662  else {
663  if (parseFlags & kParseValidateEncodingFlag ?
664  !Transcoder<SEncoding, TEncoding>::Validate(is, os) :
666  RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell());
667  }
668  }
669  }
670 
671  template<unsigned parseFlags, typename InputStream, typename Handler>
672  void ParseNumber(InputStream& is, Handler& handler) {
673  internal::StreamLocalCopy<InputStream> copy(is);
674  InputStream& s(copy.s);
675 
676  // Parse minus
677  bool minus = false;
678  if (s.Peek() == '-') {
679  minus = true;
680  s.Take();
681  }
682 
683  // Parse int: zero / ( digit1-9 *DIGIT )
684  unsigned i = 0;
685  bool try64bit = false;
686  if (s.Peek() == '0') {
687  i = 0;
688  s.Take();
689  }
690  else if (s.Peek() >= '1' && s.Peek() <= '9') {
691  i = static_cast<unsigned>(s.Take() - '0');
692 
693  if (minus)
694  while (s.Peek() >= '0' && s.Peek() <= '9') {
695  if (i >= 214748364) { // 2^31 = 2147483648
696  if (i != 214748364 || s.Peek() > '8') {
697  try64bit = true;
698  break;
699  }
700  }
701  i = i * 10 + static_cast<unsigned>(s.Take() - '0');
702  }
703  else
704  while (s.Peek() >= '0' && s.Peek() <= '9') {
705  if (i >= 429496729) { // 2^32 - 1 = 4294967295
706  if (i != 429496729 || s.Peek() > '5') {
707  try64bit = true;
708  break;
709  }
710  }
711  i = i * 10 + static_cast<unsigned>(s.Take() - '0');
712  }
713  }
714  else
715  RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
716 
717  // Parse 64bit int
718  uint64_t i64 = 0;
719  bool useDouble = false;
720  if (try64bit) {
721  i64 = i;
722  if (minus)
723  while (s.Peek() >= '0' && s.Peek() <= '9') {
724  if (i64 >= UINT64_C(922337203685477580)) // 2^63 = 9223372036854775808
725  if (i64 != UINT64_C(922337203685477580) || s.Peek() > '8') {
726  useDouble = true;
727  break;
728  }
729  i64 = i64 * 10 + static_cast<unsigned>(s.Take() - '0');
730  }
731  else
732  while (s.Peek() >= '0' && s.Peek() <= '9') {
733  if (i64 >= UINT64_C(1844674407370955161)) // 2^64 - 1 = 18446744073709551615
734  if (i64 != UINT64_C(1844674407370955161) || s.Peek() > '5') {
735  useDouble = true;
736  break;
737  }
738  i64 = i64 * 10 + static_cast<unsigned>(s.Take() - '0');
739  }
740  }
741 
742  // Force double for big integer
743  double d = 0.0;
744  if (useDouble) {
745  d = (double)i64;
746  while (s.Peek() >= '0' && s.Peek() <= '9') {
747  if (d >= 1E307)
748  RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, s.Tell());
749  d = d * 10 + (s.Take() - '0');
750  }
751  }
752 
753  // Parse frac = decimal-point 1*DIGIT
754  int expFrac = 0;
755  if (s.Peek() == '.') {
756  if (!useDouble) {
757  d = try64bit ? (double)i64 : (double)i;
758  useDouble = true;
759  }
760  s.Take();
761 
762  if (s.Peek() >= '0' && s.Peek() <= '9') {
763  d = d * 10 + (s.Take() - '0');
764  --expFrac;
765  }
766  else
767  RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell());
768 
769  while (s.Peek() >= '0' && s.Peek() <= '9') {
770  if (expFrac > -16) {
771  d = d * 10 + (s.Peek() - '0');
772  --expFrac;
773  }
774  s.Take();
775  }
776  }
777 
778  // Parse exp = e [ minus / plus ] 1*DIGIT
779  int exp = 0;
780  if (s.Peek() == 'e' || s.Peek() == 'E') {
781  if (!useDouble) {
782  d = try64bit ? (double)i64 : (double)i;
783  useDouble = true;
784  }
785  s.Take();
786 
787  bool expMinus = false;
788  if (s.Peek() == '+')
789  s.Take();
790  else if (s.Peek() == '-') {
791  s.Take();
792  expMinus = true;
793  }
794 
795  if (s.Peek() >= '0' && s.Peek() <= '9') {
796  exp = s.Take() - '0';
797  while (s.Peek() >= '0' && s.Peek() <= '9') {
798  exp = exp * 10 + (s.Take() - '0');
799  if (exp > 308 && !expMinus) // exp > 308 should be rare, so it should be checked first.
800  RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, s.Tell());
801  }
802  }
803  else
804  RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell());
805 
806  if (expMinus)
807  exp = -exp;
808  }
809 
810  // Finish parsing, call event according to the type of number.
811  bool cont = true;
812  if (useDouble) {
813  int expSum = exp + expFrac;
814  if (expSum < -308) {
815  // Prevent expSum < -308, making Pow10(expSum) = 0
816  d *= internal::Pow10(exp);
817  d *= internal::Pow10(expFrac);
818  }
819  else
820  d *= internal::Pow10(expSum);
821 
822  cont = handler.Double(minus ? -d : d);
823  }
824  else {
825  if (try64bit) {
826  if (minus)
827  cont = handler.Int64(-(int64_t)i64);
828  else
829  cont = handler.Uint64(i64);
830  }
831  else {
832  if (minus)
833  cont = handler.Int(-(int)i);
834  else
835  cont = handler.Uint(i);
836  }
837  }
838  if (!cont)
839  RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
840  }
841 
842  // Parse any JSON value
843  template<unsigned parseFlags, typename InputStream, typename Handler>
844  void ParseValue(InputStream& is, Handler& handler) {
845  switch (is.Peek()) {
846  case 'n': ParseNull <parseFlags>(is, handler); break;
847  case 't': ParseTrue <parseFlags>(is, handler); break;
848  case 'f': ParseFalse <parseFlags>(is, handler); break;
849  case '"': ParseString<parseFlags>(is, handler); break;
850  case '{': ParseObject<parseFlags>(is, handler); break;
851  case '[': ParseArray <parseFlags>(is, handler); break;
852  default : ParseNumber<parseFlags>(is, handler);
853  }
854  }
855 
856  // Iterative Parsing
857 
858  // States
859  enum IterativeParsingState {
860  IterativeParsingStartState = 0,
861  IterativeParsingFinishState,
862  IterativeParsingErrorState,
863 
864  // Object states
865  IterativeParsingObjectInitialState,
866  IterativeParsingMemberKeyState,
867  IterativeParsingKeyValueDelimiterState,
868  IterativeParsingMemberValueState,
869  IterativeParsingMemberDelimiterState,
870  IterativeParsingObjectFinishState,
871 
872  // Array states
873  IterativeParsingArrayInitialState,
874  IterativeParsingElementState,
875  IterativeParsingElementDelimiterState,
876  IterativeParsingArrayFinishState,
877 
878  cIterativeParsingStateCount
879  };
880 
881  // Tokens
882  enum Token {
883  LeftBracketToken = 0,
884  RightBracketToken,
885 
886  LeftCurlyBracketToken,
887  RightCurlyBracketToken,
888 
889  CommaToken,
890  ColonToken,
891 
892  StringToken,
893  FalseToken,
894  TrueToken,
895  NullToken,
896  NumberToken,
897 
898  kTokenCount
899  };
900 
901  RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) {
902 #define N NumberToken
903 #define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N
904  // Maps from ASCII to Token
905  static const unsigned char tokenMap[256] = {
906  N16, // 00~0F
907  N16, // 10~1F
908  N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F
909  N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F
910  N16, // 40~4F
911  N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F
912  N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F
913  N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F
914  N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF
915  };
916 #undef N
917 #undef N16
918 
919  if (sizeof(Ch) == 1 || unsigned(c) < 256)
920  return (Token)tokenMap[(unsigned char)c];
921  else
922  return NumberToken;
923  }
924 
925  RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) {
926  // current state x one lookahead token -> new state
927  static const char G[cIterativeParsingStateCount][kTokenCount] = {
928  // Start
929  {
930  IterativeParsingArrayInitialState, // Left bracket
931  IterativeParsingErrorState, // Right bracket
932  IterativeParsingObjectInitialState, // Left curly bracket
933  IterativeParsingErrorState, // Right curly bracket
934  IterativeParsingErrorState, // Comma
935  IterativeParsingErrorState, // Colon
936  IterativeParsingErrorState, // String
937  IterativeParsingErrorState, // False
938  IterativeParsingErrorState, // True
939  IterativeParsingErrorState, // Null
940  IterativeParsingErrorState // Number
941  },
942  // Finish(sink state)
943  {
944  IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
945  IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
946  IterativeParsingErrorState
947  },
948  // Error(sink state)
949  {
950  IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
951  IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
952  IterativeParsingErrorState
953  },
954  // ObjectInitial
955  {
956  IterativeParsingErrorState, // Left bracket
957  IterativeParsingErrorState, // Right bracket
958  IterativeParsingErrorState, // Left curly bracket
959  IterativeParsingObjectFinishState, // Right curly bracket
960  IterativeParsingErrorState, // Comma
961  IterativeParsingErrorState, // Colon
962  IterativeParsingMemberKeyState, // String
963  IterativeParsingErrorState, // False
964  IterativeParsingErrorState, // True
965  IterativeParsingErrorState, // Null
966  IterativeParsingErrorState // Number
967  },
968  // MemberKey
969  {
970  IterativeParsingErrorState, // Left bracket
971  IterativeParsingErrorState, // Right bracket
972  IterativeParsingErrorState, // Left curly bracket
973  IterativeParsingErrorState, // Right curly bracket
974  IterativeParsingErrorState, // Comma
975  IterativeParsingKeyValueDelimiterState, // Colon
976  IterativeParsingErrorState, // String
977  IterativeParsingErrorState, // False
978  IterativeParsingErrorState, // True
979  IterativeParsingErrorState, // Null
980  IterativeParsingErrorState // Number
981  },
982  // KeyValueDelimiter
983  {
984  IterativeParsingArrayInitialState, // Left bracket(push MemberValue state)
985  IterativeParsingErrorState, // Right bracket
986  IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state)
987  IterativeParsingErrorState, // Right curly bracket
988  IterativeParsingErrorState, // Comma
989  IterativeParsingErrorState, // Colon
990  IterativeParsingMemberValueState, // String
991  IterativeParsingMemberValueState, // False
992  IterativeParsingMemberValueState, // True
993  IterativeParsingMemberValueState, // Null
994  IterativeParsingMemberValueState // Number
995  },
996  // MemberValue
997  {
998  IterativeParsingErrorState, // Left bracket
999  IterativeParsingErrorState, // Right bracket
1000  IterativeParsingErrorState, // Left curly bracket
1001  IterativeParsingObjectFinishState, // Right curly bracket
1002  IterativeParsingMemberDelimiterState, // Comma
1003  IterativeParsingErrorState, // Colon
1004  IterativeParsingErrorState, // String
1005  IterativeParsingErrorState, // False
1006  IterativeParsingErrorState, // True
1007  IterativeParsingErrorState, // Null
1008  IterativeParsingErrorState // Number
1009  },
1010  // MemberDelimiter
1011  {
1012  IterativeParsingErrorState, // Left bracket
1013  IterativeParsingErrorState, // Right bracket
1014  IterativeParsingErrorState, // Left curly bracket
1015  IterativeParsingErrorState, // Right curly bracket
1016  IterativeParsingErrorState, // Comma
1017  IterativeParsingErrorState, // Colon
1018  IterativeParsingMemberKeyState, // String
1019  IterativeParsingErrorState, // False
1020  IterativeParsingErrorState, // True
1021  IterativeParsingErrorState, // Null
1022  IterativeParsingErrorState // Number
1023  },
1024  // ObjectFinish(sink state)
1025  {
1026  IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1027  IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1028  IterativeParsingErrorState
1029  },
1030  // ArrayInitial
1031  {
1032  IterativeParsingArrayInitialState, // Left bracket(push Element state)
1033  IterativeParsingArrayFinishState, // Right bracket
1034  IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1035  IterativeParsingErrorState, // Right curly bracket
1036  IterativeParsingErrorState, // Comma
1037  IterativeParsingErrorState, // Colon
1038  IterativeParsingElementState, // String
1039  IterativeParsingElementState, // False
1040  IterativeParsingElementState, // True
1041  IterativeParsingElementState, // Null
1042  IterativeParsingElementState // Number
1043  },
1044  // Element
1045  {
1046  IterativeParsingErrorState, // Left bracket
1047  IterativeParsingArrayFinishState, // Right bracket
1048  IterativeParsingErrorState, // Left curly bracket
1049  IterativeParsingErrorState, // Right curly bracket
1050  IterativeParsingElementDelimiterState, // Comma
1051  IterativeParsingErrorState, // Colon
1052  IterativeParsingErrorState, // String
1053  IterativeParsingErrorState, // False
1054  IterativeParsingErrorState, // True
1055  IterativeParsingErrorState, // Null
1056  IterativeParsingErrorState // Number
1057  },
1058  // ElementDelimiter
1059  {
1060  IterativeParsingArrayInitialState, // Left bracket(push Element state)
1061  IterativeParsingErrorState, // Right bracket
1062  IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1063  IterativeParsingErrorState, // Right curly bracket
1064  IterativeParsingErrorState, // Comma
1065  IterativeParsingErrorState, // Colon
1066  IterativeParsingElementState, // String
1067  IterativeParsingElementState, // False
1068  IterativeParsingElementState, // True
1069  IterativeParsingElementState, // Null
1070  IterativeParsingElementState // Number
1071  },
1072  // ArrayFinish(sink state)
1073  {
1074  IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1075  IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1076  IterativeParsingErrorState
1077  }
1078  }; // End of G
1079 
1080  return (IterativeParsingState)G[state][token];
1081  }
1082 
1083  // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
1084  // May return a new state on state pop.
1085  template <unsigned parseFlags, typename InputStream, typename Handler>
1086  RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) {
1087  switch (dst) {
1088  case IterativeParsingStartState:
1089  RAPIDJSON_ASSERT(false);
1090  return IterativeParsingErrorState;
1091 
1092  case IterativeParsingFinishState:
1093  return dst;
1094 
1095  case IterativeParsingErrorState:
1096  return dst;
1097 
1098  case IterativeParsingObjectInitialState:
1099  case IterativeParsingArrayInitialState:
1100  {
1101  // Push the state(Element or MemeberValue) if we are nested in another array or value of member.
1102  // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
1103  IterativeParsingState n = src;
1104  if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
1105  n = IterativeParsingElementState;
1106  else if (src == IterativeParsingKeyValueDelimiterState)
1107  n = IterativeParsingMemberValueState;
1108  // Push current state.
1109  *stack_.template Push<SizeType>(1) = n;
1110  // Initialize and push the member/element count.
1111  *stack_.template Push<SizeType>(1) = 0;
1112  // Call handler
1113  bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray();
1114  // On handler short circuits the parsing.
1115  if (!hr) {
1116  RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1117  return IterativeParsingErrorState;
1118  }
1119  else {
1120  is.Take();
1121  return dst;
1122  }
1123  }
1124 
1125  case IterativeParsingMemberKeyState:
1126  ParseString<parseFlags>(is, handler);
1127  if (HasParseError())
1128  return IterativeParsingErrorState;
1129  else
1130  return dst;
1131 
1132  case IterativeParsingKeyValueDelimiterState:
1133  if (token == ColonToken) {
1134  is.Take();
1135  return dst;
1136  }
1137  else
1138  return IterativeParsingErrorState;
1139 
1140  case IterativeParsingMemberValueState:
1141  // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1142  ParseValue<parseFlags>(is, handler);
1143  if (HasParseError()) {
1144  return IterativeParsingErrorState;
1145  }
1146  return dst;
1147 
1148  case IterativeParsingElementState:
1149  // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1150  ParseValue<parseFlags>(is, handler);
1151  if (HasParseError()) {
1152  return IterativeParsingErrorState;
1153  }
1154  return dst;
1155 
1156  case IterativeParsingMemberDelimiterState:
1157  case IterativeParsingElementDelimiterState:
1158  is.Take();
1159  // Update member/element count.
1160  *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1;
1161  return dst;
1162 
1163  case IterativeParsingObjectFinishState:
1164  {
1165  // Get member count.
1166  SizeType c = *stack_.template Pop<SizeType>(1);
1167  // If the object is not empty, count the last member.
1168  if (src == IterativeParsingMemberValueState)
1169  ++c;
1170  // Restore the state.
1171  IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
1172  // Transit to Finish state if this is the topmost scope.
1173  if (n == IterativeParsingStartState)
1174  n = IterativeParsingFinishState;
1175  // Call handler
1176  bool hr = handler.EndObject(c);
1177  // On handler short circuits the parsing.
1178  if (!hr) {
1179  RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1180  return IterativeParsingErrorState;
1181  }
1182  else {
1183  is.Take();
1184  return n;
1185  }
1186  }
1187 
1188  case IterativeParsingArrayFinishState:
1189  {
1190  // Get element count.
1191  SizeType c = *stack_.template Pop<SizeType>(1);
1192  // If the array is not empty, count the last element.
1193  if (src == IterativeParsingElementState)
1194  ++c;
1195  // Restore the state.
1196  IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
1197  // Transit to Finish state if this is the topmost scope.
1198  if (n == IterativeParsingStartState)
1199  n = IterativeParsingFinishState;
1200  // Call handler
1201  bool hr = handler.EndArray(c);
1202  // On handler short circuits the parsing.
1203  if (!hr) {
1204  RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1205  return IterativeParsingErrorState;
1206  }
1207  else {
1208  is.Take();
1209  return n;
1210  }
1211  }
1212 
1213  default:
1214  RAPIDJSON_ASSERT(false);
1215  return IterativeParsingErrorState;
1216  }
1217  }
1218 
1219  template <typename InputStream>
1220  void HandleError(IterativeParsingState src, InputStream& is) {
1221  if (HasParseError()) {
1222  // Error flag has been set.
1223  return;
1224  }
1225 
1226  switch (src) {
1227  case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(is.Peek() == '\0' ? kParseErrorDocumentEmpty : kParseErrorDocumentRootNotObjectOrArray, is.Tell());
1228  case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell());
1229  case IterativeParsingObjectInitialState:
1230  case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
1231  case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
1232  case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell());
1233  case IterativeParsingElementState: RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
1234  default: RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
1235  }
1236  }
1237 
1238  template <unsigned parseFlags, typename InputStream, typename Handler>
1239  ParseResult IterativeParse(InputStream& is, Handler& handler) {
1240  parseResult_.Clear();
1241  ClearStackOnExit scope(*this);
1242  IterativeParsingState state = IterativeParsingStartState;
1243 
1244  SkipWhitespace(is);
1245  while (is.Peek() != '\0') {
1246  Token t = Tokenize(is.Peek());
1247  IterativeParsingState n = Predict(state, t);
1248  IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
1249 
1250  if (d == IterativeParsingErrorState) {
1251  HandleError(state, is);
1252  break;
1253  }
1254 
1255  state = d;
1256 
1257  // Do not further consume streams if a root JSON has been parsed.
1258  if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
1259  break;
1260 
1261  SkipWhitespace(is);
1262  }
1263 
1264  // Handle the end of file.
1265  if (state != IterativeParsingFinishState)
1266  HandleError(state, is);
1267 
1268  return parseResult_;
1269  }
1270 
1271  static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string.
1272  internal::Stack<Allocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing.
1273  ParseResult parseResult_;
1274 }; // class GenericReader
1275 
1276 //! Reader with UTF8 encoding and default allocator.
1278 
1279 } // namespace rapidjson
1280 
1281 #ifdef _MSC_VER
1282 RAPIDJSON_DIAG_POP
1283 #endif
1284 
1285 #endif // RAPIDJSON_READER_H_
Result of parsing (wraps ParseErrorCode)
Definition: error.h:78
The document root must not follow by other values.
Definition: error.h:41
Iterative(constant complexity in terms of function call stack size) parsing.
Definition: reader.h:68
Concept for receiving events from GenericReader upon parsing. The functions return true if no error o...
Validate encoding of JSON strings.
Definition: reader.h:67
SAX-style JSON parser. Use Reader for UTF8 encoding and default allocator.
Definition: reader.h:330
SourceEncoding::Ch Ch
SourceEncoding character type.
Definition: reader.h:332
unsigned SizeType
Use 32-bit array/string indices even for 64-bit platform, instead of using size_t.
Definition: rapidjson.h:133
size_t GetErrorOffset() const
Get the position of last parsing error in input, 0 otherwise.
Definition: reader.h:403
ParseErrorCode GetParseErrorCode() const
Get the ParseErrorCode of last parsing.
Definition: reader.h:400
Invalid value.
Definition: error.h:43
Miss fraction part in number.
Definition: error.h:58
The document is empty.
Definition: error.h:39
The surrogate pair in string is invalid.
Definition: error.h:52
GenericInsituStringStream< UTF8<> > InsituStringStream
Insitu string stream with UTF8 encoding.
Definition: rapidjson.h:385
ParseResult Parse(InputStream &is, Handler &handler)
Parse JSON text (with kParseDefaultFlags)
Definition: reader.h:392
Missing a colon after a name of object member.
Definition: error.h:46
Missing a closing quotation mark in string.
Definition: error.h:54
Invalid escape character in string.
Definition: error.h:53
Missing a name for object member.
Definition: error.h:45
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:146
Number too big to be stored in double.
Definition: error.h:57
After parsing a complete JSON root from stream, stop further processing the rest of stream...
Definition: reader.h:69
GenericReader(Allocator *allocator=0, size_t stackCapacity=kDefaultStackCapacity)
Constructor.
Definition: reader.h:338
void SkipWhitespace(InputStream &is)
Skip the JSON white spaces in a stream.
Definition: reader.h:169
GenericStringStream< UTF8<> > StringStream
String stream with UTF8 encoding.
Definition: rapidjson.h:344
Invalid encoding in string.
Definition: error.h:55
bool IsError() const
Whether the result is an error.
Definition: error.h:93
bool HasParseError() const
Whether a parse error has occured in the last parsing.
Definition: reader.h:397
main RapidJSON namespace
Definition: allocators.h:6
Default parse flags. Non-destructive parsing. Text strings are decoded into allocated buffer...
Definition: reader.h:65
ParseFlag
Combination of parseFlags.
Definition: reader.h:64
Concept for reading and writing characters.
ParseErrorCode Code() const
Get the error code.
Definition: error.h:86
ParseResult Parse(InputStream &is, Handler &handler)
Parse JSON text.
Definition: reader.h:349
The document root must be either object or array.
Definition: error.h:40
Missing a comma or '}' after an object member.
Definition: error.h:47
Default implementation of Handler.
Definition: reader.h:106
common definitions and configuration
In-situ(destructive) parsing.
Definition: reader.h:66
ParseErrorCode
Error code of parsing.
Definition: error.h:36
UTF-8 encoding.
Definition: encodings.h:81
Unspecific syntax error.
Definition: error.h:62
Concept for allocating, resizing and freeing memory block.
Parsing was terminated.
Definition: error.h:61
size_t Offset() const
Get the error offset, if IsError(), 0 otherwise.
Definition: error.h:88
GenericReader< UTF8<>, UTF8<> > Reader
Reader with UTF8 encoding and default allocator.
Definition: reader.h:1277
static RAPIDJSON_FORCEINLINE bool Transcode(InputStream &is, OutputStream &os)
Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the outp...
Definition: encodings.h:574
Incorrect hex digit after \u escape in string.
Definition: error.h:51
Miss exponent in number.
Definition: error.h:59
Missing a comma or ']' after an array element.
Definition: error.h:49
void Clear()
Reset error code.
Definition: error.h:100