Libparserutils
inputstream.h
Go to the documentation of this file.
1 /*
2  * This file is part of LibParserUtils.
3  * Licensed under the MIT License,
4  * http://www.opensource.org/licenses/mit-license.php
5  * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
6  */
7 
8 #ifndef parserutils_input_inputstream_h_
9 #define parserutils_input_inputstream_h_
10 
11 #ifdef __cplusplus
12 extern "C"
13 {
14 #endif
15 
16 #include <stdbool.h>
17 #ifndef NDEBUG
18 #include <stdio.h>
19 #endif
20 #include <stdlib.h>
21 #include <inttypes.h>
22 
23 #include <parserutils/errors.h>
24 #include <parserutils/functypes.h>
25 #include <parserutils/types.h>
28 
33  const uint8_t *data, size_t len,
34  uint16_t *mibenum, uint32_t *source);
35 
39 typedef struct parserutils_inputstream
40 {
43  uint32_t cursor;
45  bool had_eof;
47 
48 /* Create an input stream */
50  uint32_t encsrc, parserutils_charset_detect_func csdetect,
51  parserutils_inputstream **stream);
52 /* Destroy an input stream */
54  parserutils_inputstream *stream);
55 
56 /* Append data to an input stream */
59  const uint8_t *data, size_t len);
60 /* Insert data into stream at current location */
63  const uint8_t *data, size_t len);
64 
65 /* Slow form of css_inputstream_peek. */
67  parserutils_inputstream *stream,
68  size_t offset, const uint8_t **ptr, size_t *length);
69 
92  parserutils_inputstream *stream, size_t offset,
93  const uint8_t **ptr, size_t *length)
94 {
96  const parserutils_buffer *utf8;
97  const uint8_t *utf8_data;
98  size_t len, off, utf8_len;
99 
100  if (stream == NULL || ptr == NULL || length == NULL)
101  return PARSERUTILS_BADPARM;
102 
103 #ifndef NDEBUG
104 #ifdef VERBOSE_INPUTSTREAM
105  fprintf(stdout, "Peek: len: %zu cur: %u off: %zu\n",
106  stream->utf8->length, stream->cursor, offset);
107 #endif
108 #ifdef RANDOMISE_INPUTSTREAM
110 #endif
111 #endif
112 
113  utf8 = stream->utf8;
114  utf8_data = utf8->data;
115  utf8_len = utf8->length;
116  off = stream->cursor + offset;
117 
118 #define IS_ASCII(x) (((x) & 0x80) == 0)
119 
120  if (off < utf8_len) {
121  if (IS_ASCII(utf8_data[off])) {
122  /* Early exit for ASCII case */
123  (*length) = 1;
124  (*ptr) = (utf8_data + off);
125  return PARSERUTILS_OK;
126  } else {
128  utf8_data + off, &len);
129 
130  if (error == PARSERUTILS_OK) {
131  (*length) = len;
132  (*ptr) = (utf8_data + off);
133  return PARSERUTILS_OK;
134  } else if (error != PARSERUTILS_NEEDDATA) {
135  return error;
136  }
137  }
138  }
139 
140 #undef IS_ASCII
141 
142  return parserutils_inputstream_peek_slow(stream, offset, ptr, length);
143 }
144 
152  parserutils_inputstream *stream, size_t bytes)
153 {
154  if (stream == NULL)
155  return;
156 
157 #if !defined(NDEBUG) && defined(VERBOSE_INPUTSTREAM)
158  fprintf(stdout, "Advance: len: %zu cur: %u bytes: %zu\n",
159  stream->utf8->length, stream->cursor, bytes);
160 #endif
161 
162  if (bytes > stream->utf8->length - stream->cursor)
163  bytes = stream->utf8->length - stream->cursor;
164 
165  if (stream->cursor == stream->utf8->length)
166  return;
167 
168  stream->cursor += bytes;
169 }
170 
171 /* Read the document charset */
173  parserutils_inputstream *stream, uint32_t *source);
174 /* Change the document charset */
176  parserutils_inputstream *stream,
177  const char *enc, uint32_t source);
178 
179 #ifdef __cplusplus
180 }
181 #endif
182 
183 #endif
184 
parserutils_buffer_randomise
parserutils_error parserutils_buffer_randomise(parserutils_buffer *buffer)
Definition: buffer.c:162
PARSERUTILS_BADPARM
@ PARSERUTILS_BADPARM
Definition: errors.h:22
parserutils_inputstream::utf8
parserutils_buffer * utf8
Buffer containing UTF-8 data.
Definition: inputstream.h:41
types.h
parserutils_buffer
Definition: buffer.h:19
parserutils_buffer::data
uint8_t * data
Definition: buffer.h:21
parserutils_inputstream
struct parserutils_inputstream parserutils_inputstream
Input stream object.
parserutils_inputstream_create
parserutils_error parserutils_inputstream_create(const char *enc, uint32_t encsrc, parserutils_charset_detect_func csdetect, parserutils_inputstream **stream)
Create an input stream.
Definition: inputstream.c:59
parserutils_inputstream_destroy
parserutils_error parserutils_inputstream_destroy(parserutils_inputstream *stream)
Destroy an input stream.
Definition: inputstream.c:144
parserutils_inputstream::had_eof
bool had_eof
Whether EOF has been reached.
Definition: inputstream.h:45
PARSERUTILS_OK
@ PARSERUTILS_OK
Definition: errors.h:19
parserutils_inputstream_advance
static void parserutils_inputstream_advance(parserutils_inputstream *stream, size_t bytes)
Advance the stream's current position.
Definition: inputstream.h:151
errors.h
len
size_t len
Definition: codec_8859.c:23
parserutils_inputstream_peek_slow
parserutils_error parserutils_inputstream_peek_slow(parserutils_inputstream *stream, size_t offset, const uint8_t **ptr, size_t *length)
Look at the character in the stream that starts at offset bytes from the cursor (slow version)
Definition: inputstream.c:232
parserutils_inputstream_insert
parserutils_error parserutils_inputstream_insert(parserutils_inputstream *stream, const uint8_t *data, size_t len)
Insert data into stream at current location.
Definition: inputstream.c:195
parserutils_inputstream::cursor
uint32_t cursor
Byte offset of current position.
Definition: inputstream.h:43
parserutils_buffer::length
size_t length
Definition: buffer.h:22
buffer.h
parserutils_charset_detect_func
parserutils_error(* parserutils_charset_detect_func)(const uint8_t *data, size_t len, uint16_t *mibenum, uint32_t *source)
Type of charset detection function.
Definition: inputstream.h:32
parserutils_inputstream
Input stream object.
Definition: inputstream.h:39
functypes.h
utf8.h
parserutils_error
parserutils_error
Definition: errors.h:18
PARSERUTILS_NEEDDATA
@ PARSERUTILS_NEEDDATA
Definition: errors.h:25
parserutils_inputstream_peek
static parserutils_error parserutils_inputstream_peek(parserutils_inputstream *stream, size_t offset, const uint8_t **ptr, size_t *length)
Look at the character in the stream that starts at offset bytes from the cursor.
Definition: inputstream.h:91
parserutils_inputstream_append
parserutils_error parserutils_inputstream_append(parserutils_inputstream *stream, const uint8_t *data, size_t len)
Append data to an input stream.
Definition: inputstream.c:169
parserutils_inputstream_read_charset
const char * parserutils_inputstream_read_charset(parserutils_inputstream *stream, uint32_t *source)
Read the source charset of the input stream.
Definition: inputstream.c:292
parserutils_inputstream_change_charset
parserutils_error parserutils_inputstream_change_charset(parserutils_inputstream *stream, const char *enc, uint32_t source)
Change the source charset of the input stream.
Definition: inputstream.c:321
IS_ASCII
#define IS_ASCII(x)
parserutils_charset_utf8_char_byte_length
parserutils_error parserutils_charset_utf8_char_byte_length(const uint8_t *s, size_t *len)
Calculate the length (in bytes) of a UTF-8 character.
Definition: utf8.c:107