19 using namespace shogun;
24 SG_UNSTABLE(
"CStreamingAsciiFile::CStreamingAsciiFile()",
"\n")
40 #define GET_VECTOR(fname, conv, sg_type) \
41 void CStreamingAsciiFile::get_vector(sg_type*& vector, int32_t& num_feat) \
43 char* buffer = NULL; \
45 int32_t old_len = num_feat; \
48 bytes_read = buf->read_line(buffer); \
62 char* ptr_item=NULL; \
63 char* ptr_data=buffer; \
64 DynArray<char*>* items=new DynArray<char*>(); \
68 if ((*ptr_data=='\n') || \
69 (ptr_data - buffer >= bytes_read)) \
74 append_item(items, ptr_data, ptr_item); \
81 else if (!isblank(*ptr_data) && !ptr_item) \
85 else if (isblank(*ptr_data) && ptr_item) \
87 append_item(items, ptr_data, ptr_item); \
95 SG_DEBUG("num_feat %d\n", num_feat) \
98 if (old_len < num_feat) \
99 vector=SG_REALLOC(sg_type, vector, old_len, num_feat); \
101 for (int32_t i=0; i<num_feat; i++) \
103 char* item=items->get_element(i); \
104 vector[i]=conv(item); \
111 GET_VECTOR(get_bool_vector, str_to_bool,
bool)
124 #define GET_FLOAT_VECTOR(sg_type) \
125 void CStreamingAsciiFile::get_vector(sg_type*& vector, int32_t& len)\
129 int32_t num_chars = buf->read_line(line); \
130 int32_t old_len = len; \
132 if (num_chars == 0) \
139 substring example_string = {line, line + num_chars}; \
141 CCSVFile::tokenize(m_delimiter, example_string, words); \
143 len = words.index(); \
144 substring* feature_start = &words[0]; \
147 vector = SG_REALLOC(sg_type, vector, old_len, len); \
150 for (substring* i = feature_start; i != words.end; i++) \
152 vector[j++] = SGIO::float_of_substring(*i); \
159 #undef GET_FLOAT_VECTOR
163 #define GET_VECTOR_AND_LABEL(fname, conv, sg_type) \
164 void CStreamingAsciiFile::get_vector_and_label(sg_type*& vector, int32_t& num_feat, float64_t& label) \
166 char* buffer = NULL; \
167 ssize_t bytes_read; \
168 int32_t old_len = num_feat; \
171 bytes_read = buf->read_line(buffer); \
185 char* ptr_item=NULL; \
186 char* ptr_data=buffer; \
187 DynArray<char*>* items=new DynArray<char*>(); \
191 if ((*ptr_data=='\n') || \
192 (ptr_data - buffer >= bytes_read)) \
197 append_item(items, ptr_data, ptr_item); \
204 else if (!isblank(*ptr_data) && !ptr_item) \
208 else if (isblank(*ptr_data) && ptr_item) \
210 append_item(items, ptr_data, ptr_item); \
218 SG_DEBUG("num_feat %d\n", num_feat) \
220 label=atof(items->get_element(0)); \
222 if (old_len < num_feat - 1) \
223 vector=SG_REALLOC(sg_type, vector, old_len, num_feat-1); \
225 for (int32_t i=1; i<num_feat; i++) \
227 char* item=items->get_element(i); \
228 vector[i-1]=conv(item); \
247 #undef GET_VECTOR_AND_LABEL
249 #define GET_FLOAT_VECTOR_AND_LABEL(sg_type) \
250 void CStreamingAsciiFile::get_vector_and_label(sg_type*& vector, int32_t& len, float64_t& label) \
254 int32_t num_chars = buf->read_line(line); \
255 int32_t old_len = len; \
257 if (num_chars == 0) \
264 substring example_string = {line, line + num_chars}; \
266 CCSVFile::tokenize(m_delimiter, example_string, words); \
268 label = SGIO::float_of_substring(words[0]); \
270 len = words.index() - 1; \
271 substring* feature_start = &words[1]; \
274 vector = SG_REALLOC(sg_type, vector, old_len, len); \
277 for (substring* i = feature_start; i != words.end; i++) \
279 vector[j++] = SGIO::float_of_substring(*i); \
286 #undef GET_FLOAT_VECTOR_AND_LABEL
290 #define GET_STRING(fname, conv, sg_type) \
291 void CStreamingAsciiFile::get_string(sg_type*& vector, int32_t& len) \
293 char* buffer = NULL; \
294 ssize_t bytes_read; \
297 bytes_read = buf->read_line(buffer); \
307 SG_DEBUG("Line read from the file:\n%s\n", buffer) \
309 if (buffer[bytes_read-1]=='\n') \
312 buffer[bytes_read-1]='\0'; \
316 vector=(sg_type *) buffer; \
320 GET_STRING(get_bool_string, str_to_bool,
bool)
337 #define GET_STRING_AND_LABEL(fname, conv, sg_type) \
338 void CStreamingAsciiFile::get_string_and_label(sg_type*& vector, int32_t& len, float64_t& label) \
340 char* buffer = NULL; \
341 ssize_t bytes_read; \
344 bytes_read = buf->read_line(buffer); \
354 int32_t str_start_pos=-1; \
356 for (int32_t i=0; i<bytes_read; i++) \
358 if (buffer[i] == ' ') \
361 label=atoi(buffer); \
368 if (str_start_pos == -1) \
375 if (buffer[bytes_read-1]=='\n') \
377 buffer[bytes_read-1]='\0'; \
378 len=bytes_read-str_start_pos-1; \
381 len=bytes_read-str_start_pos; \
383 vector=(sg_type*) &buffer[str_start_pos]; \
400 #undef GET_STRING_AND_LABEL
404 #define GET_SPARSE_VECTOR(fname, conv, sg_type) \
405 void CStreamingAsciiFile::get_sparse_vector(SGSparseVectorEntry<sg_type>*& vector, int32_t& len) \
407 char* buffer = NULL; \
408 ssize_t bytes_read; \
411 bytes_read = buf->read_line(buffer); \
423 if (buffer[bytes_read-1]=='\n') \
425 num_chars=bytes_read-1; \
426 buffer[num_chars]='\0'; \
429 num_chars=bytes_read; \
431 int32_t num_dims=0; \
432 for (int32_t i=0; i<num_chars; i++) \
434 if (buffer[i]==':') \
440 int32_t index_start_pos=-1; \
441 int32_t feature_start_pos; \
442 int32_t current_feat=0; \
443 if (len < num_dims) \
444 vector=SG_REALLOC(SGSparseVectorEntry<sg_type>, vector, len, num_dims); \
445 for (int32_t i=0; i<num_chars; i++) \
447 if (buffer[i]==':') \
450 vector[current_feat].feat_index=(int32_t) atoi(buffer+index_start_pos)-1; \
452 index_start_pos=-1; \
454 feature_start_pos=i+1; \
455 while ((buffer[i]!=' ') && (i<num_chars)) \
461 vector[current_feat].entry=(sg_type) conv(buffer+feature_start_pos); \
465 else if (buffer[i]==' ') \
472 if (index_start_pos == -1) \
494 #undef GET_SPARSE_VECTOR
498 #define GET_SPARSE_VECTOR_AND_LABEL(fname, conv, sg_type) \
499 void CStreamingAsciiFile::get_sparse_vector_and_label(SGSparseVectorEntry<sg_type>*& vector, int32_t& len, float64_t& label) \
501 char* buffer = NULL; \
502 ssize_t bytes_read; \
505 bytes_read = buf->read_line(buffer); \
517 if (buffer[bytes_read-1]=='\n') \
519 num_chars=bytes_read-1; \
520 buffer[num_chars]='\0'; \
523 num_chars=bytes_read; \
525 int32_t num_dims=0; \
526 for (int32_t i=0; i<num_chars; i++) \
528 if (buffer[i]==':') \
534 int32_t index_start_pos=-1; \
535 int32_t feature_start_pos; \
536 int32_t current_feat=0; \
537 int32_t label_pos=-1; \
538 if (len < num_dims) \
539 vector=SG_REALLOC(SGSparseVectorEntry<sg_type>, vector, len, num_dims); \
541 for (int32_t i=1; i<num_chars; i++) \
543 if (buffer[i]==':') \
547 if ( (buffer[i]==' ') && (buffer[i-1]!=' ') ) \
551 label=atof(buffer); \
557 SG_ERROR("No label found!\n") \
559 buffer+=label_pos+1; \
560 num_chars-=label_pos+1; \
561 for (int32_t i=0; i<num_chars; i++) \
563 if (buffer[i]==':') \
566 vector[current_feat].feat_index=(int32_t) atoi(buffer+index_start_pos)-1; \
568 index_start_pos=-1; \
570 feature_start_pos=i+1; \
571 while ((buffer[i]!=' ') && (i<num_chars)) \
577 vector[current_feat].entry=(sg_type) conv(buffer+feature_start_pos); \
581 else if (buffer[i]==' ') \
588 if (index_start_pos == -1) \
610 #undef GET_SPARSE_VECTOR_AND_LABEL
613 void CStreamingAsciiFile::append_item(
614 DynArray<T>* items,
char* ptr_data,
char* ptr_item)
616 REQUIRE(ptr_data && ptr_item,
"Data and Item to append should not be NULL\n");
618 size_t len=(ptr_data-ptr_item)/
sizeof(
char);
619 char* item=SG_MALLOC(
char, len+1);
620 memset(item, 0,
sizeof(
char)*(len+1));
621 item=strncpy(item, ptr_item, len);
623 SG_DEBUG(
"current %c, len %d, item %s\n", *ptr_data, len, item)
629 m_delimiter = delimiter;
#define GET_SPARSE_VECTOR(fname, conv, sg_type)
#define GET_STRING(fname, conv, sg_type)
void set_delimiter(char delimiter)
bool append_element(T element)
#define GET_VECTOR(fname, conv, sg_type)
virtual ~CStreamingAsciiFile()
A Streaming File access class.
#define GET_VECTOR_AND_LABEL(fname, conv, sg_type)
#define GET_FLOAT_VECTOR(sg_type)
Template Dynamic array class that creates an array that can be used like a list or an array...
#define GET_FLOAT_VECTOR_AND_LABEL(sg_type)
#define GET_STRING_AND_LABEL(fname, conv, sg_type)
#define SG_UNSTABLE(func,...)
#define GET_SPARSE_VECTOR_AND_LABEL(fname, conv, sg_type)