20 class CDelimiterTokenizer;
24 template <
class ST>
class SGString;
25 template <
class T>
class SGSparseVector;
42 CCSVFile(FILE* f,
const char* name=NULL);
50 CCSVFile(
int fd,
const char* mode,
const char* name=NULL);
58 CCSVFile(
const char* fname,
char rw=
'r',
const char* name=NULL);
95 virtual void get_vector(int8_t*& vector, int32_t& len);
96 virtual void get_vector(uint8_t*& vector, int32_t& len);
97 virtual void get_vector(
char*& vector, int32_t& len);
98 virtual void get_vector(int32_t*& vector, int32_t& len);
99 virtual void get_vector(uint32_t*& vector, int32_t& len);
103 virtual void get_vector(int16_t*& vector, int32_t& len);
104 virtual void get_vector(uint16_t*& vector, int32_t& len);
105 virtual void get_vector(int64_t*& vector, int32_t& len);
106 virtual void get_vector(uint64_t*& vector, int32_t& len);
118 uint8_t*& matrix, int32_t& num_feat, int32_t& num_vec);
120 int8_t*& matrix, int32_t& num_feat, int32_t& num_vec);
122 char*& matrix, int32_t& num_feat, int32_t& num_vec);
124 int32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
126 uint32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
128 int64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
130 uint64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
132 float32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
134 float64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
136 floatmax_t*& matrix, int32_t& num_feat, int32_t& num_vec);
138 int16_t*& matrix, int32_t& num_feat, int32_t& num_vec);
140 uint16_t*& matrix, int32_t& num_feat, int32_t& num_vec);
152 uint8_t*& array, int32_t*& dims, int32_t& num_dims);
154 char*& array, int32_t*& dims, int32_t& num_dims);
156 int32_t*& array, int32_t*& dims, int32_t& num_dims);
158 float32_t*& array, int32_t*& dims, int32_t& num_dims);
160 float64_t*& array, int32_t*& dims, int32_t& num_dims);
162 int16_t*& array, int32_t*& dims, int32_t& num_dims);
164 uint16_t*& array, int32_t*& dims, int32_t& num_dims);
213 int32_t& max_string_len);
216 int32_t& max_string_len);
219 int32_t& max_string_len);
222 int32_t& max_string_len);
225 int32_t& max_string_len);
228 int32_t& max_string_len);
231 int32_t& max_string_len);
234 int32_t& max_string_len);
237 int32_t& max_string_len);
240 int32_t& max_string_len);
243 int32_t& max_string_len);
246 int32_t& max_string_len);
259 virtual void set_vector(
const int8_t* vector, int32_t len);
260 virtual void set_vector(
const uint8_t* vector, int32_t len);
261 virtual void set_vector(
const char* vector, int32_t len);
262 virtual void set_vector(
const int32_t* vector, int32_t len);
263 virtual void set_vector(
const uint32_t* vector, int32_t len);
267 virtual void set_vector(
const int16_t* vector, int32_t len);
268 virtual void set_vector(
const uint16_t* vector, int32_t len);
269 virtual void set_vector(
const int64_t* vector, int32_t len);
270 virtual void set_vector(
const uint64_t* vector, int32_t len);
281 const uint8_t* matrix, int32_t num_feat, int32_t num_vec);
283 const int8_t* matrix, int32_t num_feat, int32_t num_vec);
285 const char* matrix, int32_t num_feat, int32_t num_vec);
287 const int32_t* matrix, int32_t num_feat, int32_t num_vec);
289 const uint32_t* matrix, int32_t num_feat, int32_t num_vec);
291 const int64_t* matrix, int32_t num_feat, int32_t num_vec);
293 const uint64_t* matrix, int32_t num_feat, int32_t num_vec);
295 const float32_t* matrix, int32_t num_feat, int32_t num_vec);
297 const float64_t* matrix, int32_t num_feat, int32_t num_vec);
299 const floatmax_t* matrix, int32_t num_feat, int32_t num_vec);
301 const int16_t* matrix, int32_t num_feat, int32_t num_vec);
303 const uint16_t* matrix, int32_t num_feat, int32_t num_vec);
385 virtual const char*
get_name()
const {
return "CSVFile"; }
392 void init_with_defaults();
395 void skip_lines(int32_t num_lines);
411 bool is_data_transposed;
417 int32_t m_num_to_skip;
void set_delimiter(char delimiter)
virtual void get_matrix(uint8_t *&matrix, int32_t &num_feat, int32_t &num_vec)
void set_transpose(bool value)
virtual void set_vector(const int8_t *vector, int32_t len)
Class v_array taken directly from JL's implementation.
virtual void set_matrix(const uint8_t *matrix, int32_t num_feat, int32_t num_vec)
virtual void set_string_list(const SGString< uint8_t > *strings, int32_t num_str)
Class for buffered reading from a ascii file.
struct Substring, specified by start position and end position.
void set_lines_to_skip(int32_t num_lines)
static void tokenize(char delim, substring s, v_array< substring > &ret)
int32_t get_stats(int32_t &num_tokens)
template class SGSparseVector The assumtion is that the stored SGSparseVectorEntry* vector is orde...
Class CSVFile used to read data from comma-separated values (CSV) files. See http://en.wikipedia.org/wiki/Comma-separated_values.
Class for reading from a string.
virtual void get_vector(int8_t *&vector, int32_t &len)
A File access base class.
virtual void get_sparse_matrix(SGSparseVector< bool > *&matrix, int32_t &num_feat, int32_t &num_vec)
The class CDelimiterTokenizer is used to tokenize a SGVector into tokens using custom chars as ...
virtual void set_sparse_matrix(const SGSparseVector< bool > *matrix, int32_t num_feat, int32_t num_vec)
virtual const char * get_name() const
virtual void get_string_list(SGString< uint8_t > *&strings, int32_t &num_str, int32_t &max_string_len)
virtual void get_ndarray(uint8_t *&array, int32_t *&dims, int32_t &num_dims)