42 #include <unordered_set>
54 return df[col_index].is<
float>() || df[col_index].is<double>()
55 || df[col_index].is<int8_t>() || df[col_index].is<int16_t>()
56 || df[col_index].is<int32_t>() || df[col_index].is<int64_t>()
57 || df[col_index].is<uint8_t>() || df[col_index].is<uint16_t>()
58 || df[col_index].is<uint32_t>() || df[col_index].is<uint64_t>();
64 if( col_index >= df.
cols() ) {
65 throw std::invalid_argument(
"Dataframe column index is out of range." );
69 if( df[col_index].is<std::string>() ) {
70 auto const& df_cast = df[col_index].as<std::string>();
86 if( col_index >= df.
cols() ) {
87 throw std::invalid_argument(
"Dataframe column index is out of range." );
91 if( df[col_index].is<std::string>() ) {
92 auto const& df_cast = df[col_index].as<std::string>();
110 template<
typename S,
typename T>
113 auto const& df_cast = df[col_index].as<S>();
114 std::vector<T> conv_col;
115 conv_col.reserve( df_cast.size() );
116 for(
size_t i = 0; i < df_cast.size(); ++i ) {
117 conv_col[i] =
static_cast<T
>( df_cast[i] );
129 if( df[col_index].is<float>() ) {
130 convert_to_type_<float, T>( df, col_index );
131 }
else if( df[col_index].is<double>() ) {
132 convert_to_type_<double, T>( df, col_index );
133 }
else if( df[col_index].is<int8_t>() ) {
134 convert_to_type_<int8_t, T>( df, col_index );
135 }
else if( df[col_index].is<int16_t>() ) {
136 convert_to_type_<int16_t, T>( df, col_index );
137 }
else if( df[col_index].is<int32_t>() ) {
138 convert_to_type_<int32_t, T>( df, col_index );
139 }
else if( df[col_index].is<int64_t>() ) {
140 convert_to_type_<int64_t, T>( df, col_index );
141 }
else if( df[col_index].is<uint8_t>() ) {
142 convert_to_type_<uint8_t, T>( df, col_index );
143 }
else if( df[col_index].is<uint16_t>() ) {
144 convert_to_type_<uint16_t, T>( df, col_index );
145 }
else if( df[col_index].is<uint32_t>() ) {
146 convert_to_type_<uint32_t, T>( df, col_index );
147 }
else if( df[col_index].is<uint64_t>() ) {
148 convert_to_type_<uint64_t, T>( df, col_index );
150 throw std::invalid_argument(
151 "Dataframe column is not of a type that be converted to the target type."
158 if( col_index >= df.
cols() ) {
159 throw std::invalid_argument(
"Dataframe column index is out of range." );
161 if( df[col_index].is<std::string>() ) {
164 auto const& df_cast = df[col_index].as<std::string>();
165 auto const bool_col =
convert_to_bool( df_cast.begin(), df_cast.end(), df_cast.size() );
168 auto char_col = std::vector<signed char>( bool_col.size() );
169 for(
size_t i = 0; i < bool_col.size(); ++i ) {
170 char_col[i] = bool_col[i];
173 df.
replace_col<
signed char>( col_index, char_col );
180 throw std::invalid_argument(
181 "Dataframe column conversion to bool is only implemented for strings."
194 if( col_index >= df.
cols() ) {
195 throw std::invalid_argument(
"Dataframe column index is out of range." );
197 if( df[col_index].is<std::string>() ) {
198 auto const& df_cast = df[col_index].as<std::string>();
199 auto const double_col =
convert_to_double( df_cast.begin(), df_cast.end(), df_cast.size() );
202 convert_to_type_<double>( df, col_index );
218 return std::to_string( col_index ) +
": \"" + df[col_index].name() +
"\" " + description +
"\n";
224 auto const& col_cast = df[col_index].as<T>();
231 assert( ip.first <= ip.second );
232 assert( ip.second == df.
rows() );
233 auto const iv = ip.second - ip.first;
245 auto const& col_cast = df[col_index].as<T>();
246 auto const mm = std::minmax_element( col_cast.begin(), col_cast.end() );
257 auto const& str_cast = df[col_index].as<std::string>();
258 std::unordered_set<std::string> uniq( str_cast.begin(), str_cast.end() );
262 "(string, unique elements: " +
std::to_string( uniq.size() ) +
")"
268 if( df[col_index].is<float>() ) {
269 return summarize_column_double_<float>( df, col_index );
270 }
else if( df[col_index].is<double>() ) {
271 return summarize_column_double_<double>( df, col_index );
272 }
else if( df[col_index].is<int8_t>() ) {
273 return summarize_column_int_<int8_t>( df, col_index );
274 }
else if( df[col_index].is<int16_t>() ) {
275 return summarize_column_int_<int16_t>( df, col_index );
276 }
else if( df[col_index].is<int32_t>() ) {
277 return summarize_column_int_<int32_t>( df, col_index );
278 }
else if( df[col_index].is<int64_t>() ) {
279 return summarize_column_int_<int64_t>( df, col_index );
280 }
else if( df[col_index].is<uint8_t>() ) {
281 return summarize_column_int_<uint8_t>( df, col_index );
282 }
else if( df[col_index].is<uint16_t>() ) {
283 return summarize_column_int_<uint16_t>( df, col_index );
284 }
else if( df[col_index].is<uint32_t>() ) {
285 return summarize_column_int_<uint32_t>( df, col_index );
286 }
else if( df[col_index].is<uint64_t>() ) {
287 return summarize_column_int_<uint64_t>( df, col_index );
288 }
else if( df[col_index].is<std::string>() ) {
294 "(unknown data type)"
306 std::string result =
"Data contains " +
std::to_string( df.
rows() ) +
" rows, and the following columns:\n";
307 for(
size_t i = 0; i < df.
cols(); ++i ) {
406 if( df.col_names_.size() != df.columns_.size() ) {
410 for(
size_t i = 0; i < df.columns_.size(); ++i ) {
411 if( df.columns_[i]->size() != df.row_names_.size() ) {
414 if( df.columns_[i]->index() != i ) {
417 if( &df.columns_[i]->dataframe() != &df ) {
422 for(
auto const& rl : df.row_lookup_ ) {
423 if( rl.second >= df.row_names_.size() ) {
426 if( rl.first != df.row_names_[ rl.second ] ) {
431 for(
auto const& cl : df.col_lookup_ ) {
432 if( cl.second >= df.col_names_.size() ) {
435 if( cl.first != df.col_names_[ cl.second ] ) {