1 #ifndef GENESIS_UTILS_CONTAINERS_DATAFRAME_READER_H_ 2 #define GENESIS_UTILS_CONTAINERS_DATAFRAME_READER_H_ 47 #include <type_traits> 57 template<
typename T = std::
string>
88 std::shared_ptr<BaseInputSource> source
100 return col_names_from_first_row_;
105 return row_names_from_first_col_;
110 col_names_from_first_row_ = value;
116 row_names_from_first_col_ = value;
132 return trim_whitespace_;
137 trim_whitespace_ = value;
143 parse_value_ = functor;
157 size_t const offset = ( row_names_from_first_col_ ? 1 : 0 );
161 if( ! input_stream ) {
166 if( col_names_from_first_row_ ) {
167 auto const col_names = reader_.
parse_line( input_stream );
170 size_t const start =
offset;
171 for(
size_t i = start; i < col_names.size(); ++i ) {
172 result.
add_col<T>( col_names[i] );
177 while( input_stream ) {
178 auto const line = reader_.
parse_line( input_stream );
182 if(( line.size() == 0 ) || ( row_names_from_first_col_ && line.size() == 1 )) {
183 throw std::runtime_error(
184 "Cannot read Dataframe with lines that do not contain any content (line " +
185 std::to_string( line_cnt ) +
"). Maybe the separator char is wrong." 188 assert( line.size() >
offset );
191 if( row_names_from_first_col_ ) {
198 if( result.
cols() == 0 ) {
200 assert( result.
rows() == 1 );
201 assert( ! col_names_from_first_row_ );
204 for(
size_t i = offset; i < line.size(); ++i ) {
207 assert( line.size() == offset + result.
cols() );
211 if( line.size() != offset + result.
cols() ) {
212 throw std::runtime_error(
213 "Dataframe input has different line lengths (line " +
219 auto const row_idx = result.
rows() - 1;
221 for(
size_t i = 0; i < result.
cols(); ++i ) {
223 col[row_idx] = parse_value_(
224 trim_whitespace_ ?
trim(line[ offset + i ]) : line[ offset + i ]
232 for(
size_t i = 0; i < result.
cols(); ++i ) {
246 col[row_idx] = convert_from_string<T>(
247 trim_whitespace_ ?
trim(line[ offset + i ]) : line[ offset + i ]
250 throw std::runtime_error(
251 "Cannot parse value \"" + line[ offset + i ] +
"\" into Dataframe. " 252 "Either the input data does not represent values of the specified data " 253 "type, or the input data table contains whitespace around the fields. " 254 "If the latter, allow to trim the respective whitespace chars by " 255 "setting the CsvReader::trim_chars() option accordingly." 267 assert( result.
rows() == line_cnt - ( col_names_from_first_row_ ? 1 : 0 ));
277 bool col_names_from_first_row_ =
true;
278 bool row_names_from_first_col_ =
true;
279 bool trim_whitespace_ =
false;
283 std::function<T( std::string const& )> parse_value_;
290 #endif // include guard void offset(Histogram &h, double value)
DataframeReader & parse_value_functor(std::function< T(std::string const &)> functor)
Column< T > & add_unnamed_col()
Column< T > & add_col(std::string const &name)
CsvReader & separator_chars(std::string const &chars)
Set the chars used to separate fields of the CSV data.
DataframeReader(char separator_char=',')
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
Provides some valuable additions to STD.
CsvReader const & csv_reader() const
bool row_names_from_first_col() const
bool col_names_from_first_row() const
std::string trim(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with trimmed white spaces.
DataframeReader & row_names_from_first_col(bool value)
Provides some commonly used string utility functions.
std::vector< std::string > parse_line(utils::InputStream &input_stream) const
Parse one line of the CSV data and return it.
bool trim_whitespace() const
self_type & add_row(std::string const &name)
DataframeReader & operator=(DataframeReader const &)=default
Read Comma/Character Separated Values (CSV) data and other delimiter-separated formats.
~DataframeReader()=default
Dataframe read(std::shared_ptr< BaseInputSource > source) const
self_type & add_unnamed_row()
std::shared_ptr< BaseOutputTarget > to_string(std::string &target_string)
Obtain an output target for writing to a string.
DataframeReader(CsvReader const &reader)
DataframeReader & trim_whitespace(bool value)
DataframeReader & col_names_from_first_row(bool value)