MMTF-C++
The C++ language MMTF libraries
binary_decoder.hpp
Go to the documentation of this file.
1 // *************************************************************************
2 //
3 // Licensed under the MIT License (see accompanying LICENSE file).
4 //
5 // The authors of this code are: Gerardo Tauriello, and Daniel Farrell.
6 //
7 // Based on mmtf_c developed by Julien Ferte (http://www.julienferte.com/),
8 // Anthony Bradley, Thomas Holder with contributions from Yana Valasatava,
9 // Gazal Kalyan, Alexander Rose
10 //
11 // *************************************************************************
12 
13 #ifndef MMTF_BINARY_DECODER_H
14 #define MMTF_BINARY_DECODER_H
15 
16 #include "structure_data.hpp"
17 #include "errors.hpp"
18 
19 #include <msgpack.hpp>
20 #include <cstring> // low level mem
21 #include <sstream>
22 #include <limits>
23 #include <algorithm>
24 
25 namespace mmtf {
26 
31 public:
41  BinaryDecoder(const msgpack::object& obj,
42  const std::string& key = "UNNAMED_BINARY");
43 
53  BinaryDecoder(const std::string& str,
54  const std::string& key = "UNNAMED_BINARY");
55 
71  template<typename T>
72  void decode(T& target) const;
73 
74 private:
75  // for error reporting
76  std::string key_;
77  // data from binary header
78  int32_t strategy_;
79  int32_t length_;
80  int32_t parameter_;
81  const char* encodedData_;
82  uint32_t encodedDataLength_; // max. size for binary is 2^32 - 1
83 
84  // helper function for constructors
85  void
86  initFromData(const char * str_data,
87  const std::size_t len);
88 
89  // check length consistency (throws)
90  void checkLength_(int32_t exp_length) const;
91  // check if binary data is divisible by x (throws)
92  void checkDivisibleBy_(int32_t item_size) const;
93 
94  // byte decoders
95  void decodeFromBytes_(std::vector<float>& output) const;
96  void decodeFromBytes_(std::vector<int8_t>& output) const;
97  void decodeFromBytes_(std::vector<int16_t>& output) const;
98  void decodeFromBytes_(std::vector<int32_t>& output) const;
99  // special one: decode to vector of strings
100  void decodeFromBytes_(std::vector<std::string>& output) const;
101 
102  // run length decoding
103  // -> Int and IntOut can be any integer types
104  // -> Int values are blindly converted to IntOut
105  template<typename Int, typename IntOut>
106  void runLengthDecode_(const std::vector<Int>& input,
107  std::vector<IntOut>& output) const;
108 
109  // delta decoding -> Int can be any integer type
110  template<typename Int>
111  void deltaDecode_(const std::vector<Int>& input, std::vector<Int>& output) const;
112  // variant doing it in-place
113  template<typename Int>
114  void deltaDecode_(std::vector<Int>& in_out) const;
115 
116  // recursive indexing decode -> SmallInt must be smaller than Int
117  template<typename SmallInt, typename Int>
118  void recursiveIndexDecode_(const std::vector<SmallInt>& input,
119  std::vector<Int>& output) const;
120 
121  // decode integer to float -> Int can be any integer type
122  template<typename Int>
123  void decodeDivide_(const std::vector<Int>& input, float const divisor,
124  std::vector<float>& output) const;
125 };
126 
127 // *************************************************************************
128 // IMPLEMENTATION
129 // *************************************************************************
130 
131 // helpers in anonymous namespace (only visible in this file)
132 namespace {
133 
134 // byteorder functions ("ntohl" etc.)
135 #ifdef WIN32
136 #include <winsock2.h>
137 #else
138 #include <arpa/inet.h>
139 #endif
140 
141 #ifndef __EMSCRIPTEN__
142 void assignBigendian4(void* dst, const char* src) {
143  uint32_t tmp;
144  std::memcpy(&tmp, src, sizeof(uint32_t));
145  tmp = ntohl(tmp);
146  std::memcpy(dst, &tmp, sizeof(uint32_t));
147 }
148 
149 void assignBigendian2(void* dst, const char* src) {
150  uint16_t tmp;
151  std::memcpy(&tmp, src, sizeof(uint16_t));
152  tmp = ntohs(tmp);
153  std::memcpy(dst, &tmp, sizeof(uint16_t));
154 }
155 #else
156 // Need to avoid how emscripten handles memory
157 // Note that this will only work on little endian machines, but this should not be a major
158 // an issue as Emscripten only supports little endian hardware.
159 // see: https://kripken.github.io/emscripten-site/docs/porting/guidelines/portability_guidelines.html
160 
161 void assignBigendian4(void* dst, const char* src) {
162  ((uint8_t*)dst)[0] = src[3];
163  ((uint8_t*)dst)[1] = src[2];
164  ((uint8_t*)dst)[2] = src[1];
165  ((uint8_t*)dst)[3] = src[0];
166 }
167 
168 void assignBigendian2(void* dst, const char* src) {
169  ((uint8_t*)dst)[0] = src[1];
170  ((uint8_t*)dst)[1] = src[0];
171 }
172 #endif
173 
174 void arrayCopyBigendian4(void* dst, const char* src, size_t n) {
175  for (size_t i = 0; i < n; i += 4) {
176  assignBigendian4(((char*)dst) + i, src + i);
177  }
178 }
179 
180 void arrayCopyBigendian2(void* dst, const char* src, size_t n) {
181  for (size_t i = 0; i < n; i += 2) {
182  assignBigendian2(((char*)dst) + i, src + i);
183  }
184 }
185 
186 } // anon ns
187 
188 
189 // note this does not set key_, you must set it in ctor
190 inline void BinaryDecoder::initFromData(const char * bytes, std::size_t const len) {
191  assignBigendian4(&strategy_, bytes);
192  assignBigendian4(&length_, bytes + 4);
193  assignBigendian4(&parameter_, bytes + 8);
194  encodedData_ = bytes + 12;
195  encodedDataLength_ = len - 12;
196 }
197 
198 inline BinaryDecoder::BinaryDecoder(const msgpack::object& obj,
199  const std::string& key)
200  : key_(key) {
201  // sanity checks
202  if (obj.type != msgpack::type::BIN) {
203  throw DecodeError("The '" + key + "' entry is not binary data");
204  }
205  if (obj.via.bin.size < 12) {
206  std::stringstream err;
207  err << "The '" + key + "' entry is too short " << obj.via.bin.size;
208  throw DecodeError(err.str());
209  }
210  this->initFromData(obj.via.bin.ptr, obj.via.bin.size);
211 }
212 
213 inline BinaryDecoder::BinaryDecoder(const std::string& str,
214  const std::string& key)
215  : key_(key) {
216  this->initFromData(str.data(), str.size());
217 }
218 
219 template<typename T>
220 void BinaryDecoder::decode(T&) const {
221  throw mmtf::DecodeError("Invalid target type for binary '" + key_ + "'");
222 }
223 
224 template<>
225 inline void BinaryDecoder::decode(std::vector<float>& output) const {
226 
227  // check strategy to parse
228  switch (strategy_) {
229  case 1: {
230  decodeFromBytes_(output);
231  break;
232  }
233  case 9: {
234  std::vector<int32_t> step1;
235  std::vector<int32_t> step2;
236  decodeFromBytes_(step1);
237  runLengthDecode_(step1, step2);
238  decodeDivide_(step2, static_cast<float>(parameter_), output);
239  break;
240  }
241  case 10: {
242  std::vector<int16_t> step1;
243  std::vector<int32_t> step2;
244  decodeFromBytes_(step1);
245  recursiveIndexDecode_(step1, step2);
246  deltaDecode_(step2);
247  decodeDivide_(step2, static_cast<float>(parameter_), output);
248  break;
249  }
250  case 11: {
251  std::vector<int16_t> step1;
252  decodeFromBytes_(step1);
253  decodeDivide_(step1, static_cast<float>(parameter_), output);
254  break;
255  }
256  case 12: {
257  std::vector<int16_t> step1;
258  std::vector<int32_t> step2;
259  decodeFromBytes_(step1);
260  recursiveIndexDecode_(step1, step2);
261  decodeDivide_(step2, static_cast<float>(parameter_), output);
262  break;
263  }
264  case 13: {
265  std::vector<int8_t> step1;
266  std::vector<int32_t> step2;
267  decodeFromBytes_(step1);
268  recursiveIndexDecode_(step1, step2);
269  decodeDivide_(step2, static_cast<float>(parameter_), output);
270  break;
271  }
272  default: {
273  std::stringstream err;
274  err << "Invalid strategy " << strategy_ << " for binary '" + key_
275  << "': does not decode to float array";
276  throw DecodeError(err.str());
277  }
278  }
279 
280  // check size
281  checkLength_(output.size());
282 }
283 
284 template<>
285 inline void BinaryDecoder::decode(std::vector<int8_t>& output) const {
286 
287  // check strategy to parse
288  switch (strategy_) {
289  case 2: {
290  decodeFromBytes_(output);
291  break;
292  }
293  case 16: {
294  std::vector<int32_t> step1;
295  decodeFromBytes_(step1);
296  runLengthDecode_(step1, output);
297  break;
298  }
299  default: {
300  std::stringstream err;
301  err << "Invalid strategy " << strategy_ << " for binary '" + key_
302  << "': does not decode to int8 array";
303  throw DecodeError(err.str());
304  }
305  }
306 
307  // check size
308  checkLength_(output.size());
309 }
310 
311 template<>
312 inline void BinaryDecoder::decode(std::vector<int16_t>& output) const {
313 
314  // check strategy to parse
315  switch (strategy_) {
316  case 3: {
317  decodeFromBytes_(output);
318  break;
319  }
320  default: {
321  std::stringstream err;
322  err << "Invalid strategy " << strategy_ << " for binary '" + key_
323  << "': does not decode to int16 array";
324  throw DecodeError(err.str());
325  }
326  }
327 
328  // check size
329  checkLength_(output.size());
330 }
331 
332 template<>
333 inline void BinaryDecoder::decode(std::vector<int32_t>& output) const {
334 
335  // check strategy to parse
336  switch (strategy_) {
337  case 4: {
338  decodeFromBytes_(output);
339  break;
340  }
341  case 7: {
342  std::vector<int32_t> step1;
343  decodeFromBytes_(step1);
344  runLengthDecode_(step1, output);
345  break;
346  }
347  case 8: {
348  std::vector<int32_t> step1;
349  decodeFromBytes_(step1);
350  runLengthDecode_(step1, output);
351  deltaDecode_(output);
352  break;
353  }
354  case 14: {
355  std::vector<int16_t> step1;
356  decodeFromBytes_(step1);
357  recursiveIndexDecode_(step1, output);
358  break;
359  }
360  case 15: {
361  std::vector<int8_t> step1;
362  decodeFromBytes_(step1);
363  recursiveIndexDecode_(step1, output);
364  break;
365  }
366  default: {
367  std::stringstream err;
368  err << "Invalid strategy " << strategy_ << " for binary '" + key_
369  << "': does not decode to int32 array";
370  throw DecodeError(err.str());
371  }
372  }
373 
374  // check size
375  checkLength_(output.size());
376 }
377 
378 template<>
379 inline void BinaryDecoder::decode(std::vector<std::string>& output) const {
380 
381  // check strategy to parse
382  switch (strategy_) {
383  case 5: {
384  decodeFromBytes_(output);
385  break;
386  }
387  default: {
388  std::stringstream err;
389  err << "Invalid strategy " << strategy_ << " for binary '" + key_
390  << "': does not decode to string array";
391  throw DecodeError(err.str());
392  }
393  }
394 
395  // check size
396  checkLength_(output.size());
397 }
398 
399 template<>
400 inline void BinaryDecoder::decode(std::vector<char>& output) const {
401 
402  // check strategy to parse
403  switch (strategy_) {
404  case 6: {
405  std::vector<int32_t> step1;
406  decodeFromBytes_(step1);
407  runLengthDecode_(step1, output);
408  break;
409  }
410  default: {
411  std::stringstream err;
412  err << "Invalid strategy " << strategy_ << " for binary '" + key_
413  << "': does not decode to string array";
414  throw DecodeError(err.str());
415  }
416  }
417 
418  // check size
419  checkLength_(output.size());
420 }
421 
422 // checks
423 inline void BinaryDecoder::checkLength_(int32_t exp_length) const {
424  if (length_ != exp_length) {
425  std::stringstream err;
426  err << "Length mismatch for binary '" + key_ + "': "
427  << length_ << " vs " << exp_length;
428  throw DecodeError(err.str());
429  }
430 }
431 
432 inline void BinaryDecoder::checkDivisibleBy_(int32_t item_size) const {
433  if (encodedDataLength_ % item_size != 0) {
434  std::stringstream err;
435  err << "Binary length of '" + key_ + "': "
436  << encodedDataLength_ << " is not a multiple of " << item_size;
437  throw DecodeError(err.str());
438  }
439 }
440 
441 // byte decoders
442 inline void BinaryDecoder::decodeFromBytes_(std::vector<float>& output) const {
443  checkDivisibleBy_(4);
444  // prepare memory
445  output.resize(encodedDataLength_ / 4);
446  // get data
447  if(!output.empty()) {
448  arrayCopyBigendian4(&output[0], encodedData_, encodedDataLength_);
449  }
450 }
451 inline void BinaryDecoder::decodeFromBytes_(std::vector<int8_t>& output) const {
452  // prepare memory
453  output.resize(encodedDataLength_);
454  // get data
455  if (!output.empty()) {
456  memcpy(&output[0], encodedData_, encodedDataLength_);
457  }
458 }
459 inline void BinaryDecoder::decodeFromBytes_(std::vector<int16_t>& output) const {
460  checkDivisibleBy_(2);
461  // prepare memory
462  output.resize(encodedDataLength_ / 2);
463  // get data
464  if (!output.empty()) {
465  arrayCopyBigendian2(&output[0], encodedData_, encodedDataLength_);
466  }
467 }
468 inline void BinaryDecoder::decodeFromBytes_(std::vector<int32_t>& output) const {
469  checkDivisibleBy_(4);
470  // prepare memory
471  output.resize(encodedDataLength_ / 4);
472  // get data
473  if (!output.empty()) {
474  arrayCopyBigendian4(&output[0], encodedData_, encodedDataLength_);
475  }
476 }
477 // special one: decode to vector of strings
478 inline void BinaryDecoder::decodeFromBytes_(std::vector<std::string>& output) const {
479  char NULL_BYTE = 0x00;
480  // check parameter
481  const int32_t str_len = parameter_;
482  checkDivisibleBy_(str_len);
483  // prepare memory
484  output.resize(encodedDataLength_ / str_len);
485  // get data
486  for (size_t i = 0; i < output.size(); ++i) {
487  output[i].assign(encodedData_ + i * str_len, str_len);
488  output[i].erase(std::remove(output[i].begin(), output[i].end(), NULL_BYTE), output[i].end());
489  }
490 }
491 
492 // run length decoding
493 template<typename Int, typename IntOut>
494 void BinaryDecoder::runLengthDecode_(const std::vector<Int>& input,
495  std::vector<IntOut>& output) const {
496  // we work with pairs of numbers
497  checkDivisibleBy_(2);
498  // find out size of resulting vector (for speed)
499  size_t out_size = 0;
500  for (size_t i = 0; i < input.size(); i += 2) {
501  out_size += input[i + 1];
502  }
503  // reserve space (for speed)
504  output.clear();
505  output.reserve(out_size);
506  // get data
507  for (size_t i = 0; i < input.size(); i += 2) {
508  const IntOut value = IntOut(input[i]);
509  const Int number = input[i+1];
510  for (Int j = 0; j < number; ++j) {
511  output.push_back(value);
512  }
513  }
514 }
515 
516 // delta decoding
517 template<typename Int>
518 void BinaryDecoder::deltaDecode_(const std::vector<Int>& input,
519  std::vector<Int>& output) const {
520  // reserve space (for speed)
521  output.clear();
522  if (input.empty()) return; // ensure we have some values
523  output.reserve(input.size());
524  // get data
525  output.push_back(input[0]);
526  for (size_t i = 1; i < input.size(); ++i) {
527  output.push_back(output[i - 1] + input[i]);
528  }
529 }
530 template<typename Int>
531 void BinaryDecoder::deltaDecode_(std::vector<Int>& in_out) const {
532  for (size_t i = 1; i < in_out.size(); ++i) {
533  in_out[i] = in_out[i - 1] + in_out[i];
534  }
535 }
536 
537 // recursive indexing decode
538 template<typename SmallInt, typename Int>
539 void BinaryDecoder::recursiveIndexDecode_(const std::vector<SmallInt>& input,
540  std::vector<Int>& output) const {
541  // get limits
542  const SmallInt min_int = std::numeric_limits<SmallInt>::min();
543  const SmallInt max_int = std::numeric_limits<SmallInt>::max();
544  // find out size of resulting vector (for speed)
545  size_t out_size = 0;
546  for (size_t i = 0; i < input.size(); ++i) {
547  if (input[i] != min_int && input[i] != max_int) ++out_size;
548  }
549  // reserve space (for speed)
550  output.clear();
551  output.reserve(out_size);
552  // get data
553  Int cur_val = 0;
554  for (size_t i = 0; i < input.size(); ++i) {
555  cur_val += input[i];
556  if (input[i] != min_int && input[i] != max_int) {
557  output.push_back(cur_val);
558  cur_val = 0;
559  }
560  }
561 }
562 
563 // decode integer to float
564 template<typename Int>
565 void BinaryDecoder::decodeDivide_(const std::vector<Int>& input, float const divisor,
566  std::vector<float>& output) const {
567  // reserve space and get inverted divisor (for speed)
568  output.clear();
569  output.reserve(input.size());
570  float inv_div = float(1) / divisor;
571  // get data
572  for (size_t i = 0; i < input.size(); ++i) {
573  output.push_back(float(input[i]) * inv_div);
574  }
575 }
576 
577 } // mmtf namespace
578 
579 #endif
Helper class to decode msgpack binary into a vector.
Definition: binary_decoder.hpp:30
BinaryDecoder(const msgpack::object &obj, const std::string &key="UNNAMED_BINARY")
Initialize object given a msgpack object. Reads out binary header to prepare for call of decode.
Definition: binary_decoder.hpp:198
void decode(T &target) const
Decode binary msgpack object into the given target.
Definition: binary_decoder.hpp:220
Exception thrown when failing during decoding.
Definition: errors.hpp:23
Definition: binary_decoder.hpp:25