|
A library for working with phylogenetic and population genetic data.
v0.32.0
|
|
Go to the documentation of this file. 1 #ifndef GENESIS_UTILS_CONTAINERS_DATAFRAME_READER_H_
2 #define GENESIS_UTILS_CONTAINERS_DATAFRAME_READER_H_
47 #include <type_traits>
57 template<
typename T = std::
string>
88 std::shared_ptr<BaseInputSource> source
100 return col_names_from_first_row_;
105 return row_names_from_first_col_;
110 col_names_from_first_row_ = value;
116 row_names_from_first_col_ = value;
132 return trim_whitespace_;
137 trim_whitespace_ = value;
143 parse_value_ = functor;
157 size_t const offset = ( row_names_from_first_col_ ? 1 : 0 );
161 if( ! input_stream ) {
166 if( col_names_from_first_row_ ) {
167 auto const col_names = reader_.
parse_line( input_stream );
170 size_t const start =
offset;
171 for(
size_t i = start; i < col_names.size(); ++i ) {
172 result.
add_col<T>( col_names[i] );
177 while( input_stream ) {
178 auto const line = reader_.
parse_line( input_stream );
182 if(( line.size() == 0 ) || ( row_names_from_first_col_ && line.size() == 1 )) {
183 throw std::runtime_error(
184 "Cannot read Dataframe with lines that do not contain any content (line " +
185 std::to_string( line_cnt ) +
"). Maybe the separator char is wrong."
188 assert( line.size() >
offset );
191 if( row_names_from_first_col_ ) {
198 if( result.
cols() == 0 ) {
200 assert( result.
rows() == 1 );
201 assert( ! col_names_from_first_row_ );
204 for(
size_t i =
offset; i < line.size(); ++i ) {
207 assert( line.size() ==
offset + result.
cols() );
212 throw std::runtime_error(
213 "Dataframe input has different line lengths (line " +
219 auto const row_idx = result.
rows() - 1;
221 for(
size_t i = 0; i < result.
cols(); ++i ) {
222 auto& col =
dynamic_cast<Dataframe::Column<T>&
>(result[i]);
223 col[row_idx] = parse_value_(
232 for(
size_t i = 0; i < result.
cols(); ++i ) {
233 auto& col =
dynamic_cast<Dataframe::Column<T>&
>(result[i]);
246 col[row_idx] = convert_from_string<T>(
250 throw std::runtime_error(
253 "Cannot parse value \"" + line[
offset + i ] +
"\" into Dataframe. "
254 "Either the input data does not represent values of the specified data "
255 "type, or the input data table contains whitespace around the fields. "
256 "If the latter, allow to trim the respective whitespace chars by "
257 "setting the CsvReader::trim_chars() option accordingly."
269 assert( result.
rows() == line_cnt - ( col_names_from_first_row_ ? 1 : 0 ));
279 bool col_names_from_first_row_ =
true;
280 bool row_names_from_first_col_ =
true;
281 bool trim_whitespace_ =
false;
285 std::function<T( std::string
const& )> parse_value_;
292 #endif // include guard
DataframeReader & trim_whitespace(bool value)
std::vector< std::string > parse_line(utils::InputStream &input_stream) const
Parse one line of the CSV data and return it.
CsvReader & separator_chars(std::string const &chars)
Set the chars used to separate fields of the CSV data.
CsvReader const & csv_reader() const
bool col_names_from_first_row() const
bool trim_whitespace() const
std::string trim(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with trimmed white spaces (or any other delimiters).
void offset(Histogram &h, double value)
Provides some valuable additions to STD.
DataframeReader(CsvReader const &reader)
self_type & add_row(std::string const &name)
std::string to_string(GenomeLocus const &locus)
DataframeReader & operator=(DataframeReader const &)=default
Provides some commonly used string utility functions.
DataframeReader & parse_value_functor(std::function< T(std::string const &)> functor)
DataframeReader(char separator_char=',')
~DataframeReader()=default
DataframeReader & row_names_from_first_col(bool value)
Column< T > & add_unnamed_col()
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
DataframeReader & col_names_from_first_row(bool value)
bool row_names_from_first_col() const
Read Comma/Character Separated Values (CSV) data and other delimiter-separated formats.
self_type & add_unnamed_row()
Dataframe read(std::shared_ptr< BaseInputSource > source) const
Column< T > & add_col(std::string const &name)