19 using namespace shogun;
33 CFile(fname, rw, name)
43 void CProtobufFile::init()
46 message_size=1024*1024;
48 buffer=SG_MALLOC(uint8_t, message_size*
sizeof(uint32_t));
51 #define GET_VECTOR(sg_type) \
52 void CProtobufFile::get_vector(sg_type*& vector, int32_t& len) \
54 read_and_validate_global_header(ShogunVersion::VECTOR); \
55 VectorHeader data_header=read_vector_header(); \
56 len=data_header.len(); \
57 read_memory_block(vector, len, data_header.num_messages()); \
74 #define GET_MATRIX(read_func, sg_type) \
75 void CProtobufFile::get_matrix(sg_type*& matrix, int32_t& num_feat, int32_t& num_vec) \
77 read_and_validate_global_header(ShogunVersion::MATRIX); \
78 MatrixHeader data_header=read_matrix_header(); \
79 num_feat=data_header.num_cols(); \
80 num_vec=data_header.num_rows(); \
81 read_memory_block(matrix, num_feat*num_vec, data_header.num_messages()); \
98 #define GET_NDARRAY(read_func, sg_type) \
99 void CProtobufFile::get_ndarray(sg_type*& array, int32_t*& dims, int32_t& num_dims) \
113 #define GET_SPARSE_MATRIX(sg_type) \
114 void CProtobufFile::get_sparse_matrix( \
115 SGSparseVector<sg_type>*& matrix, int32_t& num_feat, int32_t& num_vec) \
117 read_and_validate_global_header(ShogunVersion::SPARSE_MATRIX); \
118 SparseMatrixHeader data_header=read_sparse_matrix_header(); \
119 num_feat=data_header.num_features(); \
120 num_vec=data_header.num_vectors(); \
121 read_sparse_matrix(matrix, data_header); \
137 #undef GET_SPARSE_MATRIX
139 #define SET_VECTOR(sg_type) \
140 void CProtobufFile::set_vector(const sg_type* vector, int32_t len) \
142 int32_t num_messages=compute_num_messages(len, sizeof(sg_type)); \
143 write_global_header(ShogunVersion::VECTOR); \
144 write_vector_header(len, num_messages); \
145 write_memory_block(vector, len, num_messages); \
162 #define SET_MATRIX(sg_type) \
163 void CProtobufFile::set_matrix(const sg_type* matrix, int32_t num_feat, int32_t num_vec) \
165 int32_t num_messages=compute_num_messages(num_feat*num_vec, sizeof(sg_type)); \
166 write_global_header(ShogunVersion::MATRIX); \
167 write_matrix_header(num_feat, num_vec, num_messages); \
168 write_memory_block(matrix, num_feat*num_vec, num_messages); \
185 #define SET_SPARSE_MATRIX(sg_type) \
186 void CProtobufFile::set_sparse_matrix( \
187 const SGSparseVector<sg_type>* matrix, int32_t num_feat, int32_t num_vec) \
189 write_global_header(ShogunVersion::SPARSE_MATRIX); \
190 write_sparse_matrix_header(matrix, num_feat, num_vec); \
191 write_sparse_matrix(matrix, num_vec); \
207 #undef SET_SPARSE_MATRIX
209 #define GET_STRING_LIST(sg_type) \
210 void CProtobufFile::get_string_list( \
211 SGString<sg_type>*& strings, int32_t& num_str, \
212 int32_t& max_string_len) \
214 read_and_validate_global_header(ShogunVersion::STRING_LIST); \
215 StringListHeader data_header=read_string_list_header(); \
216 num_str=data_header.num_str(); \
217 max_string_len=data_header.max_string_len(); \
218 read_string_list(strings, data_header); \
233 #undef GET_STRING_LIST
235 #define SET_STRING_LIST(sg_type) \
236 void CProtobufFile::set_string_list( \
237 const SGString<sg_type>* strings, int32_t num_str) \
239 write_global_header(ShogunVersion::STRING_LIST); \
240 write_string_list_header(strings, num_str); \
241 write_string_list(strings, num_str); \
256 #undef SET_STRING_LIST
258 void CProtobufFile::write_big_endian_uint(uint32_t number, uint8_t* array, uint32_t size)
261 SG_ERROR(
"array is too small to write\n");
263 array[0]=(number>>24)&0xffu;
264 array[1]=(number>>16)&0xffu;
265 array[2]=(number>>8)&0xffu;
266 array[3]=number&0xffu;
269 uint32_t CProtobufFile::read_big_endian_uint(uint8_t* array, uint32_t size)
272 SG_ERROR(
"array is too small to read\n");
274 return (array[0]<<24) | (array[1]<<16) | (array[2]<<8) | array[3];
277 int32_t CProtobufFile::compute_num_messages(uint64_t len, int32_t sizeof_type)
const
279 uint32_t elements_in_message=message_size/sizeof_type;
280 uint32_t num_messages=len/elements_in_message;
281 if (len % elements_in_message > 0)
290 read_message(header);
300 write_message(header);
306 read_message(data_header);
314 read_message(data_header);
322 read_message(data_header);
330 read_message(data_header);
335 void CProtobufFile::write_vector_header(int32_t len, int32_t num_messages)
340 write_message(data_header);
343 void CProtobufFile::write_matrix_header(int32_t num_feat, int32_t num_vec, int32_t num_messages)
349 write_message(data_header);
352 #define WRITE_SPARSE_MATRIX_HEADER(sg_type) \
353 void CProtobufFile::write_sparse_matrix_header( \
354 const SGSparseVector<sg_type>* matrix, int32_t num_feat, int32_t num_vec) \
356 SparseMatrixHeader data_header; \
357 data_header.set_num_features(num_feat); \
358 data_header.set_num_vectors(num_vec); \
359 for (int32_t i=0; i<num_vec; i++) \
361 data_header.add_num_feat_entries(matrix[i].num_feat_entries); \
364 write_message(data_header); \
380 #undef WRITE_SPARSE_MATRIX_HEADER
382 #define WRITE_STRING_LIST_HEADER(sg_type) \
383 void CProtobufFile::write_string_list_header(const SGString<sg_type>* strings, int32_t num_str) \
385 int32_t max_string_len=0; \
386 StringListHeader data_header; \
387 data_header.set_num_str(num_str); \
388 for (int32_t i=0; i<num_str; i++) \
390 data_header.add_str_len(strings[i].slen); \
391 if (strings[i].slen>max_string_len) \
392 max_string_len=strings[i].slen; \
394 data_header.set_max_string_len(max_string_len); \
395 write_message(data_header); \
410 #undef WRITE_STRING_LIST_HEADER
412 void CProtobufFile::read_message(google::protobuf::Message& message)
414 uint32_t bytes_read=0;
418 bytes_read=fread(uint_buffer,
sizeof(
char),
sizeof(uint32_t),
file);
419 REQUIRE(bytes_read==
sizeof(uint32_t),
"IO error\n");
420 msg_size=read_big_endian_uint(uint_buffer,
sizeof(uint32_t));
421 REQUIRE(msg_size>0,
"message size should be more than zero\n");
424 bytes_read=fread(buffer,
sizeof(
char), msg_size,
file);
425 REQUIRE(bytes_read==msg_size,
"IO error\n");
428 REQUIRE(message.ParseFromArray(buffer, msg_size),
"cannot parse header\n");
431 void CProtobufFile::write_message(
const google::protobuf::Message& message)
433 uint32_t bytes_write=0;
434 uint32_t msg_size=message.ByteSize();
437 write_big_endian_uint(msg_size, uint_buffer,
sizeof(uint32_t));
438 bytes_write=fwrite(uint_buffer,
sizeof(
char),
sizeof(uint32_t),
file);
439 REQUIRE(bytes_write==
sizeof(uint32_t),
"IO error\n");
442 message.SerializeToArray(buffer, msg_size);
443 bytes_write=fwrite(buffer,
sizeof(
char), msg_size,
file);
444 REQUIRE(bytes_write==msg_size,
"IO error\n");
447 #define READ_MEMORY_BLOCK(chunk_type, sg_type) \
448 void CProtobufFile::read_memory_block(sg_type*& vector, uint64_t len, int32_t num_messages) \
450 vector=SG_MALLOC(sg_type, len); \
453 int32_t elements_in_message=message_size/sizeof(sg_type); \
454 for (int32_t i=0; i<num_messages; i++) \
456 read_message(chunk); \
458 int32_t num_elements_to_read=0; \
459 if ((len-(i+1)*elements_in_message)<=0) \
460 num_elements_to_read=len-i*elements_in_message; \
462 num_elements_to_read=elements_in_message; \
464 for (int32_t j=0; j<num_elements_to_read; j++) \
465 vector[j+i*elements_in_message]=chunk.data(j); \
481 #undef READ_MEMORY_BLOCK
483 #define WRITE_MEMORY_BLOCK(chunk_type, sg_type) \
484 void CProtobufFile::write_memory_block(const sg_type* vector, uint64_t len, int32_t num_messages) \
487 int32_t elements_in_message=message_size/sizeof(sg_type); \
488 for (int32_t i=0; i<num_messages; i++) \
491 int32_t num_elements_to_write=0; \
492 if ((len-(i+1)*elements_in_message)<=0) \
493 num_elements_to_write=len-i*elements_in_message; \
495 num_elements_to_write=elements_in_message; \
497 for (int32_t j=0; j<num_elements_to_write; j++) \
498 chunk.add_data(vector[j+i*elements_in_message]); \
500 write_message(chunk); \
517 #undef WRITE_MEMORY_BLOCK
519 #define READ_SPARSE_MATRIX(chunk_type, sg_type) \
520 void CProtobufFile::read_sparse_matrix( \
521 SGSparseVector<sg_type>*& matrix, const SparseMatrixHeader& data_header) \
523 matrix=SG_MALLOC(SGSparseVector<sg_type>, data_header.num_vectors()); \
525 UInt64Chunk feat_index_chunk; \
526 chunk_type entry_chunk; \
527 read_message(feat_index_chunk); \
528 read_message(entry_chunk); \
530 int32_t elements_in_message=message_size/sizeof(sg_type); \
531 int32_t buffer_counter=0; \
532 for (uint32_t i=0; i<data_header.num_vectors(); i++) \
534 matrix[i]=SGSparseVector<sg_type>(data_header.num_feat_entries(i)); \
535 for (int32_t j=0; j<matrix[i].num_feat_entries; j++) \
537 matrix[i].features[j].feat_index=feat_index_chunk.data(buffer_counter); \
538 matrix[i].features[j].entry=entry_chunk.data(buffer_counter); \
541 if (buffer_counter==elements_in_message) \
543 read_message(feat_index_chunk); \
544 read_message(entry_chunk); \
564 #undef READ_SPARSE_MATRIX
566 #define WRITE_SPARSE_MATRIX(chunk_type, sg_type) \
567 void CProtobufFile::write_sparse_matrix( \
568 const SGSparseVector<sg_type>* matrix, int32_t num_vec) \
570 UInt64Chunk feat_index_chunk; \
571 chunk_type entry_chunk; \
572 int32_t elements_in_message=message_size/sizeof(sg_type); \
573 int32_t buffer_counter=0; \
574 for (int32_t i=0; i<num_vec; i++) \
576 for (int32_t j=0; j<matrix[i].num_feat_entries; j++) \
578 feat_index_chunk.add_data(matrix[i].features[j].feat_index); \
579 entry_chunk.add_data(matrix[i].features[j].entry); \
582 if (buffer_counter==elements_in_message) \
584 write_message(feat_index_chunk); \
585 write_message(entry_chunk); \
586 feat_index_chunk.Clear(); \
587 entry_chunk.Clear(); \
593 if (buffer_counter!=0) \
595 write_message(feat_index_chunk); \
596 write_message(entry_chunk); \
613 #undef WRITE_SPARSE_MATRIX
615 #define READ_STRING_LIST(chunk_type, sg_type) \
616 void CProtobufFile::read_string_list( \
617 SGString<sg_type>*& strings, const StringListHeader& data_header) \
619 strings=SG_MALLOC(SGString<sg_type>, data_header.num_str()); \
622 read_message(chunk); \
623 int32_t elements_in_message=message_size/sizeof(sg_type); \
624 int32_t buffer_counter=0; \
625 for (uint32_t i=0; i<data_header.num_str(); i++) \
627 strings[i]=SGString<sg_type>(data_header.str_len(i)); \
628 for (int32_t j=0; j<strings[i].slen; j++) \
630 strings[i].string[j]=chunk.data(buffer_counter); \
633 if (buffer_counter==elements_in_message) \
635 read_message(chunk); \
654 #undef READ_STRING_LIST
656 #define WRITE_STRING_LIST(chunk_type, sg_type) \
657 void CProtobufFile::write_string_list( \
658 const SGString<sg_type>* strings, int32_t num_str) \
661 int32_t elements_in_message=message_size/sizeof(sg_type); \
662 int32_t buffer_counter=0; \
663 for (int32_t i=0; i<num_str; i++) \
665 for (int32_t j=0; j<strings[i].slen; j++) \
667 chunk.add_data(strings[i].string[j]); \
670 if (buffer_counter==elements_in_message) \
672 write_message(chunk); \
679 if (buffer_counter!=0) \
680 write_message(chunk); \
695 #undef WRITE_STRING_LIST
inline::shogun::ShogunVersion_SGDataType data_type() const
void set_data_type(::shogun::ShogunVersion_SGDataType value)
#define SET_MATRIX(sg_type)
#define GET_MATRIX(read_func, sg_type)
#define GET_STRING_LIST(sg_type)
#define SET_STRING_LIST(sg_type)
#define GET_NDARRAY(read_func, sg_type)
#define GET_SPARSE_MATRIX(sg_type)
#define GET_VECTOR(sg_type)
#define READ_STRING_LIST(chunk_type, sg_type)
#define WRITE_STRING_LIST(chunk_type, sg_type)
#define SET_VECTOR(sg_type)
void set_version(::google::protobuf::int32 value)
A File access base class.
#define WRITE_SPARSE_MATRIX(chunk_type, sg_type)
#define WRITE_SPARSE_MATRIX_HEADER(sg_type)
inline::google::protobuf::int32 version() const
#define WRITE_STRING_LIST_HEADER(sg_type)
#define WRITE_MEMORY_BLOCK(chunk_type, sg_type)
#define READ_SPARSE_MATRIX(chunk_type, sg_type)
#define READ_MEMORY_BLOCK(chunk_type, sg_type)
#define SET_SPARSE_MATRIX(sg_type)