27 #include <boost/regex.hpp> 29 using boost::regex_match;
30 #include <boost/algorithm/string.hpp> 31 #include <boost/lexical_cast.hpp> 32 #include <boost/tokenizer.hpp> 48 regex column_separator {
"\\s+"};
53 size_t comment_pos = line.
find(comment);
54 if (comment_pos != std::string::npos) {
55 line = line.
substr(0, comment_pos);
59 boost::sregex_token_iterator i (line.
begin(), line.
end(), column_separator, -1);
60 boost::sregex_token_iterator j;
75 if (keyword ==
"bool" || keyword ==
"boolean") {
77 }
else if (keyword ==
"int" || keyword ==
"int32") {
78 return typeid(int32_t);
79 }
else if (keyword ==
"long" || keyword ==
"int64") {
80 return typeid(int64_t);
81 }
else if (keyword ==
"float") {
83 }
else if (keyword ==
"double") {
84 return typeid(double);
85 }
else if (keyword ==
"string") {
87 }
else if (keyword ==
"[bool]" || keyword ==
"[boolean]") {
89 }
else if (keyword ==
"[int]" || keyword ==
"[int32]") {
91 }
else if (keyword ==
"[long]" || keyword ==
"[int64]") {
93 }
else if (keyword ==
"[float]") {
95 }
else if (keyword ==
"[double]") {
97 }
else if (keyword ==
"[bool+]" || keyword ==
"[boolean+]") {
99 }
else if (keyword ==
"[int+]" || keyword ==
"[int32+]") {
101 }
else if (keyword ==
"[long+]" || keyword ==
"[int64+]") {
103 }
else if (keyword ==
"[float+]") {
105 }
else if (keyword ==
"[double+]") {
122 if (boost::starts_with(line, comment)) {
125 boost::replace_all(line, comment,
"");
127 if (boost::starts_with(line,
"Column:")) {
131 boost::sregex_token_iterator token (line.
begin(), line.
end(), regex{
"\\s+"}, -1);
132 boost::sregex_token_iterator end;
134 if (descriptions.
count(name) != 0) {
141 if (!boost::starts_with(token_str,
"(") && token_str !=
"-") {
149 if (boost::starts_with(token_str,
"(")) {
156 if (token != end && *token ==
"-") {
160 while (token != end) {
161 desc << *token <<
' ';
165 boost::trim(desc_str);
166 descriptions.
emplace(std::piecewise_construct,
180 size_t columns_number) {
195 if (boost::starts_with(line, comment)) {
198 boost::replace_all(line, comment,
"");
203 if (boost::starts_with(line,
"Column:")) {
207 auto space_i = temp.
find(
' ');
209 temp = temp.
substr(0, space_i);
211 desc_names.emplace_back(
std::move(temp));
219 if (!last_comment.empty()){
220 boost::sregex_token_iterator i (last_comment.begin(), last_comment.end(), regex{
"\\s+"}, -1);
221 boost::sregex_token_iterator j;
226 if (names.size() != columns_number) {
233 if (desc_names.size() != 0 && desc_names.size() != columns_number) {
234 logger.
warn() <<
"Number of column descriptions does not matches the number" 235 <<
" of the columns";
240 if (names.size() < columns_number) {
241 for (
size_t i=names.size()+1; i<=columns_number; ++i) {
247 for (
auto name : names) {
248 if (!set.insert(name).second) {
257 template <
typename T>
260 boost::char_separator<char> sep {
","};
261 boost::tokenizer< boost::char_separator<char> > tok {str, sep};
262 for (
auto& s : tok) {
268 template <
typename T>
269 NdArray<T> convertStringToNdArray(
const std::string& str) {
272 }
else if (str[0] !=
'<') {
276 auto closing_char = str.
find(
'>');
277 if (closing_char == std::string::npos) {
281 auto shape_str = str.
substr(1, closing_char - 1);
282 auto shape_i = convertStringToVector<int32_t>(shape_str);
283 auto data = convertStringToVector<T>(str.
substr(closing_char + 1));
287 return NdArray<T>(shape_u, data);
294 if (type ==
typeid(
bool)) {
295 if (value ==
"true" || value ==
"t" || value ==
"yes" || value ==
"y" || value ==
"1") {
298 if (value ==
"false" || value ==
"f" || value ==
"no" || value ==
"n" || value ==
"0") {
301 }
else if (type ==
typeid(int32_t)) {
303 }
else if (type ==
typeid(int64_t)) {
305 }
else if (type ==
typeid(
float)) {
307 }
else if (type ==
typeid(
double)) {
332 }
catch( boost::bad_lexical_cast
const& ) {
343 size_t comment_pos = line.
find(comment);
344 if (comment_pos != std::string::npos) {
345 line = line.
substr(0, comment_pos);
361 size_t comment_pos = line.
find(comment);
362 if (comment_pos != std::string::npos) {
363 line = line.
substr(0, comment_pos);
std::size_t countRemainingRows(std::istream &in, const std::string &comment)
T forward_as_tuple(T... args)
boost::variant< bool, int32_t, int64_t, float, double, std::string, std::vector< bool >, std::vector< int32_t >, std::vector< int64_t >, std::vector< float >, std::vector< double >, NdArray::NdArray< bool >, NdArray::NdArray< int32_t >, NdArray::NdArray< int64_t >, NdArray::NdArray< float >, NdArray::NdArray< double > > cell_type
The possible cell types.
static Elements::Logging logger
Row::cell_type convertToCellType(const std::string &value, std::type_index type)
Converts the given value to a Row::cell_type of the given type.
std::type_index keywordToType(const std::string &keyword)
NdArray(const std::vector< size_t > &shape)
void warn(const std::string &logMessage)
std::map< std::string, ColumnDescription > autoDetectColumnDescriptions(std::istream &in, const std::string &comment)
Reads the column descriptions of the given stream.
This class gets a stream as argument during construction and when it is deleted it sets the position ...
bool hasNextRow(std::istream &in, const std::string &comment)
T back_inserter(T... args)
static Logging getLogger(const std::string &name="")
size_t countColumns(std::istream &in, const std::string &comment)
Returns the number of whitespace separated tokens of the first non commented line.
std::vector< std::string > autoDetectColumnNames(std::istream &in, const std::string &comment, size_t columns_number)
Reads the column names of the given stream.