libdap Updated for version 3.20.10
libdap4 is an implementation of OPeNDAP's DAP protocol.
chunked_istream.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of libdap, A C++ implementation of the OPeNDAP Data
4// Access Protocol.
5
6// Copyright (c) 2009 OPeNDAP, Inc.
7// Author: James Gallagher <jgallagher@opendap.org>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22//
23// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24//
25// Portions of this code were taken verbatim from Josuttis,
26// "The C++ Standard Library," p.672
27
28#include "config.h"
29
30#include <stdint.h>
31#include <arpa/inet.h>
32
33#include <cstring>
34#include <vector>
35
36#include "chunked_stream.h"
37#include "chunked_istream.h"
38
39#include "Error.h"
40
41//#define DODS_DEBUG
42//#define DODS_DEBUG2
43#ifdef DODS_DEBUG
44#include <iostream>
45#endif
46
47#include "util.h"
48#include "debug.h"
49
50namespace libdap {
51
52/*
53 This code does not use a 'put back' buffer, but here's a picture of the
54 d_buffer pointer, eback(), gptr() and egptr() that can be used to see how
55 the I/O Stream library's streambuf class works. For the case with no
56 putback, just imagine it as zero and eliminate the leftmost extension. This
57 might also come in useful if the code was extended to support put back. I
58 removed that feature because I don't see it being used with our chunked
59 transmission protocol and it requires an extra call to memcopy() when data
60 are added to the internal buffer.
61
62 d_buffer d_buffer + putBack
63 | |
64 v v
65 |---------|--------------------------------------------|....
66 | | | .
67 |---------|--------------------------------------------|....
68 ^ ^ ^
69 | | |
70 eback() gptr() egptr()
71
72 */
73
83std::streambuf::int_type
85{
86 DBG(cerr << "underflow..." << endl);
87 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
88
89 // return the next character; uflow() increments the puffer pointer.
90 if (gptr() < egptr())
91 return traits_type::to_int_type(*gptr());
92
93 // gptr() == egptr() so read more data from the underlying input source.
94
95 // To read data from the chunked stream, first read the header
96 uint32_t header;
97 d_is.read((char *) &header, 4);
98
99 // When the endian nature of the server is encoded in the chunk header, the header is
100 // sent using network byte order
101 header = ntohl(header);
102
103 // There are two 'EOF' cases: One where the END chunk is zero bytes and one where
104 // it holds data. In the latter case, bytes those will be read and moved into the
105 // buffer. Once those data are consumed, we'll be back here again and this read()
106 // will return EOF. See below for the other case...
107 if (d_is.eof()) return traits_type::eof();
108
109 // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
110 if (!d_set_twiddle) {
111 d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
112 d_set_twiddle = true;
113 }
114
115 uint32_t chunk_size = header & CHUNK_SIZE_MASK;
116
117 DBG(cerr << "underflow: chunk size from header: " << chunk_size << endl);
118 DBG(cerr << "underflow: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
119 DBG(cerr << "underflow: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
120
121 // Handle the case where the buffer is not big enough to hold the incoming chunk
122 if (chunk_size > d_buf_size) {
123 d_buf_size = chunk_size;
124 m_buffer_alloc();
125 }
126
127 // If the END chunk has zero bytes, return EOF. See above for more information
128 if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) return traits_type::eof();
129
130 // Read the chunk's data
131 d_is.read(d_buffer, chunk_size);
132 DBG2(cerr << "underflow: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad() << endl);
133 if (d_is.bad()) return traits_type::eof();
134
135 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
136 setg(d_buffer, // beginning of put back area
137 d_buffer, // read position (gptr() == eback())
138 d_buffer + chunk_size); // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error
139
140 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
141
142 switch (header & CHUNK_TYPE_MASK) {
143 case CHUNK_END:
144 DBG2(cerr << "Found end chunk" << endl);
145 return traits_type::to_int_type(*gptr());
146 case CHUNK_DATA:
147 return traits_type::to_int_type(*gptr());
148
149 case CHUNK_ERR:
150 // this is pretty much the end of the show... Assume the buffer/chunk holds
151 // the error message text.
152 d_error = true;
153 d_error_message = string(d_buffer, chunk_size);
154 return traits_type::eof();
155 default:
156 d_error = true;
157 d_error_message = "Failed to read known chunk header type.";
158 return traits_type::eof();
159 }
160}
161
178std::streamsize
179chunked_inbuf::xsgetn(char* s, std::streamsize num)
180{
181 DBG(cerr << "xsgetn... num: " << num << endl);
182
183 // if num is <= the chars currently in the buffer
184 if (num <= (egptr() - gptr())) {
185 memcpy(s, gptr(), num);
186 gbump(num);
187
188 return traits_type::not_eof(num);
189 }
190
191 // else they asked for more
192 uint32_t bytes_left_to_read = num;
193
194 // are there any bytes in the buffer? if so grab them first
195 if (gptr() < egptr()) {
196 int bytes_to_transfer = egptr() - gptr();
197 memcpy(s, gptr(), bytes_to_transfer);
198 gbump(bytes_to_transfer);
199 s += bytes_to_transfer;
200 bytes_left_to_read -= bytes_to_transfer;
201 }
202
203 // We need to get more bytes from the underlying stream; at this
204 // point the internal buffer is empty.
205
206 // read the remaining bytes to transfer, a chunk at a time,
207 // and put any leftover stuff in the buffer.
208
209 // note that when the code is here, gptr() == egptr(), so the
210 // next call to read() will fall through the previous tests and
211 // read at least one chunk here.
212 bool done = false;
213 while (!done) {
214 // Get a chunk header
215 uint32_t header;
216 d_is.read((char *) &header, 4);
217
218 header = ntohl(header);
219
220 // There are two EOF cases: One where the END chunk is zero bytes and one where
221 // it holds data. In the latter case, those will be read and moved into the
222 // buffer. Once those data are consumed, we'll be back here again and this read()
223 // will return EOF. See below for the other case...
224 if (d_is.eof()) return traits_type::eof();
225
226 // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
227 if (!d_set_twiddle) {
228 d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
229 d_set_twiddle = true;
230 }
231
232 uint32_t chunk_size = header & CHUNK_SIZE_MASK;
233 DBG(cerr << "xsgetn: chunk size from header: " << chunk_size << endl);
234 DBG(cerr << "xsgetn: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
235 DBG(cerr << "xsgetn: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
236
237 // handle error chunks here
238 if ((header & CHUNK_TYPE_MASK) == CHUNK_ERR) {
239 d_error = true;
240 // Note that d_buffer is not used to avoid calling resize if it is too
241 // small to hold the error message. At this point, there's not much reason
242 // to optimize transport efficiency, however.
243 std::vector<char> message(chunk_size);
244 d_is.read(message.data(), chunk_size);
245 d_error_message = string(message.data(), chunk_size);
246 // leave the buffer and gptr(), ..., in a consistent state (empty)
247 setg(d_buffer, d_buffer, d_buffer);
248 }
249 // And zero-length END chunks here.
250 else if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) {
251 return traits_type::not_eof(num-bytes_left_to_read);
252 }
253 // The next case is complicated because we read some data from the current
254 // chunk into 's' an some into the internal buffer.
255 else if (chunk_size > bytes_left_to_read) {
256 d_is.read(s, bytes_left_to_read);
257 if (d_is.bad()) return traits_type::eof();
258
259 // Now slurp up the remain part of the chunk and store it in the buffer
260 uint32_t bytes_leftover = chunk_size - bytes_left_to_read;
261 // expand the internal buffer if needed
262 if (bytes_leftover > d_buf_size) {
263 d_buf_size = chunk_size;
264 m_buffer_alloc();
265 }
266 // read the remain stuff in to d_buffer
267 d_is.read(d_buffer, bytes_leftover);
268 if (d_is.bad()) return traits_type::eof();
269
270 setg(d_buffer, // beginning of put back area
271 d_buffer, // read position (gptr() == eback())
272 d_buffer + bytes_leftover /*d_is.gcount()*/); // end of buffer (egptr())
273
274 bytes_left_to_read = 0 /* -= d_is.gcount()*/;
275 }
276 else {
277 // expand the internal buffer if needed
278 if (chunk_size > d_buf_size) {
279 d_buf_size = chunk_size;
280 m_buffer_alloc();
281 }
282 // If we get a chunk that's zero bytes, Don't call read()
283 // to save the kernel context switch overhead.
284 if (chunk_size > 0) {
285 d_is.read(s, chunk_size);
286 if (d_is.bad()) return traits_type::eof();
287 bytes_left_to_read -= chunk_size /*d_is.gcount()*/;
288 s += chunk_size;
289 }
290 }
291
292 switch (header & CHUNK_TYPE_MASK) {
293 case CHUNK_END:
294 DBG(cerr << "Found end chunk" << endl);
295 // in this case bytes_left_to_read can be > 0 because we ran out of data
296 // before reading all the requested bytes. The next read() call will return
297 // eof; this call returns the number of bytes read and transferred to 's'.
298 done = true;
299 break;
300
301 case CHUNK_DATA:
302 done = bytes_left_to_read == 0;
303 break;
304
305 case CHUNK_ERR:
306 // this is pretty much the end of the show... The error message has
307 // already been read above
308 return traits_type::eof();
309
310 default:
311 d_error = true;
312 d_error_message = "Failed to read known chunk header type.";
313 return traits_type::eof();
314 }
315 }
316
317 return traits_type::not_eof(num-bytes_left_to_read);
318}
319
332std::streambuf::int_type
334{
335 // To read data from the chunked stream, first read the header
336 uint32_t header;
337 d_is.read((char *) &header, 4);
338
339 header = ntohl(header);
340
341 // There are two 'EOF' cases: One where the END chunk is zero bytes and one where
342 // it holds data. In the latter case, bytes those will be read and moved into the
343 // buffer. Once those data are consumed, we'll be back here again and this read()
344 // will return EOF. See below for the other case...
345 if (d_is.eof()) return traits_type::eof();
346
347 // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
348 if (!d_set_twiddle) {
349 d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
350 d_set_twiddle = true;
351 }
352
353 uint32_t chunk_size = header & CHUNK_SIZE_MASK;
354
355 DBG(cerr << "read_next_chunk: chunk size from header: " << chunk_size << endl);
356 DBG(cerr << "read_next_chunk: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
357 DBG(cerr << "read_next_chunk: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
358
359 // Handle the case where the buffer is not big enough to hold the incoming chunk
360 if (chunk_size > d_buf_size) {
361 d_buf_size = chunk_size;
362 m_buffer_alloc();
363 }
364
365 // If the END chunk has zero bytes, return EOF. See above for more information
366 if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) return traits_type::eof();
367
368 // Read the chunk's data
369 d_is.read(d_buffer, chunk_size);
370 DBG2(cerr << "read_next_chunk: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad() << endl);
371 if (d_is.bad()) return traits_type::eof();
372
373 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
374 setg(d_buffer, // beginning of put back area
375 d_buffer, // read position (gptr() == eback())
376 d_buffer + chunk_size); // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error
377
378 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
379
380 switch (header & CHUNK_TYPE_MASK) {
381 case CHUNK_END:
382 DBG(cerr << "Found end chunk" << endl);
383 return traits_type::not_eof(chunk_size);
384
385 case CHUNK_DATA:
386 return traits_type::not_eof(chunk_size);
387
388 case CHUNK_ERR:
389 // this is pretty much the end of the show... Assume the buffer/chunk holds
390 // the error message text.
391 d_error = true;
392 d_error_message = string(d_buffer, chunk_size);
393 return traits_type::eof();
394
395 default:
396 d_error = true;
397 d_error_message = "Failed to read known chunk header type.";
398 return traits_type::eof();
399 }
400}
401
402}
virtual int_type underflow()
Insert new characters into the buffer This specialization of underflow is called when the gptr() is a...
int_type read_next_chunk()
Read a chunk Normally the chunked nature of a chunked_istream/chunked_inbuf is hidden from the caller...
virtual std::streamsize xsgetn(char *s, std::streamsize num)
Read a block of data This specialization of xsgetn() reads num bytes and puts them in s first reading...
top level DAP object to house generic methods
bool is_host_big_endian()
Does this host use big-endian byte order?
Definition util.cc:94