1 #ifndef GENESIS_UTILS_CONTAINERS_DATAFRAME_H_
2 #define GENESIS_UTILS_CONTAINERS_DATAFRAME_H_
44 #include <type_traits>
45 #include <unordered_map>
126 assert( df_ && &(df_->
at(index_)) ==
this );
130 std::string
const&
name()
const
154 return dynamic_cast<Column<T>&
>( *this );
160 return dynamic_cast<Column<T> const&
>( *this );
166 auto const c =
dynamic_cast<Column<T> const*
>( this );
167 return ( c !=
nullptr );
173 return as<T>()[
index];
179 return as<T>()[
index];
202 virtual bool empty_()
const = 0;
205 virtual void clear_() = 0;
209 virtual std::unique_ptr<ColumnBase>
clone_()
const = 0;
229 template <
typename T>
230 class Column :
public ColumnBase
247 using iterator =
typename container_type::iterator;
255 ! std::is_same<value_type, bool>::value,
256 "Cannot instanciate Dataframe Column with type bool, "
257 "because std::vector<bool> does not offer proper references."
280 virtual ~Column()
override =
default;
290 return content_.begin();
295 return content_.cbegin();
300 return content_.end();
305 return content_.cend();
310 return content_.cbegin();
315 return content_.cend();
324 return content_[ index ];
329 return content_[ index ];
334 return content_[ dataframe().row_index(
row_name ) ];
339 return content_[ dataframe().row_index(
row_name ) ];
344 return content_.at( index );
349 return content_.at( index );
354 return content_[ dataframe().row_index(
row_name ) ];
359 return content_[ dataframe().row_index(
row_name ) ];
372 self_type& operator = ( std::vector<value_type>
const& vec )
374 if( vec.size() != content_.size() ) {
375 throw std::runtime_error(
376 "Cannot assign vector with different size to Dataframe column."
392 if( vec.size() != content_.size() ) {
393 throw std::runtime_error(
394 "Cannot assign vector with different size to Dataframe column."
398 content_ = std::move( vec );
424 operator std::vector<value_type>
const&()
const
437 return content_.size();
440 bool empty_()
const override
442 return content_.empty();
445 void clear_()
override
452 content_.resize( size );
455 void add_row_()
override
457 content_.emplace_back();
462 content_.erase( content_.begin() +
row_index );
465 std::unique_ptr<ColumnBase> clone_()
const override
473 auto ret = std::unique_ptr< ColumnBase >(
new self_type( *df_, index_ ));
474 dynamic_cast<self_type&
>( *ret ).content_ = content_;
484 std::vector< value_type > content_;
515 : row_names_ ( other.row_names_ )
516 , col_names_ ( other.col_names_ )
517 , row_lookup_( other.row_lookup_ )
518 , col_lookup_( other.col_lookup_ )
521 for(
auto const& col : other.columns_ ) {
522 columns_.emplace_back( col->clone_() );
523 columns_.back()->df_ =
this;
524 columns_.back()->index_ = columns_.size() - 1;
526 assert( columns_.size() > 0 );
527 assert( columns_.back()->size() == row_names_.size() );
536 if( &other ==
this ) {
552 swap( lhs.columns_, rhs.columns_ );
553 swap( lhs.row_names_, rhs.row_names_ );
554 swap( lhs.col_names_, rhs.col_names_ );
555 swap( lhs.row_lookup_, rhs.row_lookup_ );
556 swap( lhs.col_lookup_, rhs.col_lookup_ );
559 for(
size_t i = 0; i < lhs.columns_.size(); ++i ) {
560 lhs.columns_[i]->df_ = &lhs;
562 for(
size_t i = 0; i < rhs.columns_.size(); ++i ) {
563 rhs.columns_[i]->df_ = &rhs;
580 return columns_.begin();
585 return columns_.cbegin();
590 return columns_.end();
595 return columns_.cend();
600 return columns_.cbegin();
605 return columns_.cend();
614 return row_names_.size();
619 assert( columns_.size() == col_names_.size() );
620 return columns_.size();
625 return columns_.empty() && row_names_.empty();
679 throw std::runtime_error(
"Cannot use empty row name." );
681 return ( row_lookup_.count(
row_name ) > 0 );
686 return row_lookup_.size() < row_names_.size();
691 if( row_lookup_.count(
row_name ) == 0 ) {
692 throw std::out_of_range(
"Dataframe has no row with name '" +
row_name +
"'." );
704 auto const& old = row_names_.at(
row_index );
705 row_lookup_.erase( old );
720 throw std::runtime_error(
"Cannot use empty column name." );
722 return ( col_lookup_.count(
col_name ) > 0 );
727 return col_lookup_.size() < col_names_.size();
732 if( col_lookup_.count(
col_name ) == 0 ) {
733 throw std::out_of_range(
"Dataframe has no column with name '" +
col_name +
"'." );
745 auto const& old = col_names_.at(
col_index );
746 col_lookup_.erase( old );
765 auto const index = columns_.size();
767 columns_.back()->resize_( row_names_.size() );
768 col_names_.emplace_back();
770 return columns_.back()->as<T>();
776 auto& col = add_unnamed_col<T>();
777 for(
auto& e : col.content_ ) {
793 if( init.size() != row_names_.size() ) {
794 throw std::invalid_argument(
795 "Cannot add column to Dataframe if initial values vector is of different size."
798 auto& col = add_unnamed_col<T>();
799 assert( col.size() == row_names_.size() );
804 col.content_ = std::move( init );
812 throw std::runtime_error(
813 "Cannot add a column with an empty name. Use add_unnamed_col() instead."
816 if( col_lookup_.count( name ) > 0 ) {
817 throw std::runtime_error(
"Column with name " + name +
" already exists in Dataframe." );
820 auto const index = columns_.size();
823 columns_.back()->resize_( row_names_.size() );
824 col_names_.emplace_back( name );
825 col_lookup_[ name ] = index;
827 return columns_.back()->as<T>();
833 auto& col = add_col<T>( name );
834 for(
auto& e : col.content_ ) {
844 return add_col( name, std::move( copy ));
850 if( init.size() != row_names_.size() ) {
851 throw std::invalid_argument(
852 "Cannot add column to Dataframe if initial values vector is of different size: "
856 auto& col = add_col<T>( name );
857 assert( col.size() == row_names_.size() );
862 col.content_ = std::move( init );
868 row_names_.emplace_back();
870 for(
auto& col : columns_ ) {
871 assert( row_names_.size() == col->size() + 1 );
881 throw std::runtime_error(
882 "Cannot add a row with an empty name. Use add_unnamed_row() instead."
885 if( row_lookup_.count( name ) > 0 ) {
886 throw std::runtime_error(
"Row with name " + name +
" already exists in Dataframe." );
890 row_names_.emplace_back( name );
891 row_lookup_[ name ] = row_names_.size() - 1;
894 for(
auto& col : columns_ ) {
895 assert( row_names_.size() == col->size() + 1 );
909 assert( columns_.size() == col_names_.size() );
910 if( at_index >= columns_.size() ) {
911 throw std::runtime_error(
912 "Invalid column index greater than or equal to number of columns."
916 columns_[ at_index ] = std::unique_ptr<Column<T>>(
new Column<T>( *
this, at_index ));
917 columns_[ at_index ]->resize_( row_names_.size() );
918 return columns_[ at_index ]->as<T>();
924 auto& col = replace_col<T>( at_index );
925 for(
auto& e : col.content_ ) {
941 if( init.size() != row_names_.size() ) {
942 throw std::invalid_argument(
943 "Cannot replace column in Dataframe if initial values vector is of different size."
946 auto& col = replace_col<T>( at_index );
947 assert( col.size() == row_names_.size() );
952 col.content_ = std::move( init );
959 if( at_name.empty() ) {
960 throw std::runtime_error(
"Cannot replace column with empty name given." );
962 if( col_lookup_.count( at_name ) == 0 ) {
963 throw std::runtime_error(
964 "Column with name " + at_name +
" does not exist in Dataframe."
969 assert( col_names_[ index ] == at_name );
970 return replace_col<T>( index );
976 auto& col = replace_col<T>( at_name );
977 for(
auto& e : col.content_ ) {
993 if( init.size() != row_names_.size() ) {
994 throw std::invalid_argument(
995 "Cannot replace column in Dataframe if initial values vector is of different size."
998 auto& col = replace_col<T>( at_name );
999 assert( col.size() == row_names_.size() );
1004 col.content_ = std::move( init );
1017 row_lookup_.clear();
1018 col_lookup_.clear();
1024 for(
auto& col : columns_ ) {
1028 row_lookup_.clear();
1036 col_lookup_.clear();
1042 assert( columns_.size() == col_names_.size() );
1044 throw std::runtime_error(
1045 "Invalid column index greater than or equal to number of columns."
1050 auto const name = col_names_[
col_index ];
1051 columns_.erase( columns_.begin() +
col_index );
1052 col_names_.erase( col_names_.begin() +
col_index );
1053 col_lookup_.erase( name );
1056 for(
size_t i =
col_index; i < columns_.size(); ++i ) {
1057 --columns_[i]->index_;
1058 assert( columns_[i]->index() == i );
1062 for(
auto& le : col_lookup_ ) {
1075 assert( col_names_[ index ] ==
col_name );
1083 throw std::runtime_error(
"Invalid row index greater than or equal to number of rows." );
1087 for(
auto& col : columns_ ) {
1088 assert( col->size() == row_names_.size() );
1092 auto const name = row_names_[
row_index ];
1093 row_names_.erase( row_names_.begin() +
row_index );
1094 row_lookup_.erase( name );
1097 for(
auto& le : row_lookup_ ) {
1110 assert( row_names_[ index ] ==
row_name );
1123 std::vector< std::string > row_names_;
1124 std::vector< std::string > col_names_;
1126 std::unordered_map< std::string, size_t > row_lookup_;
1127 std::unordered_map< std::string, size_t > col_lookup_;
1135 static_assert( std::is_move_constructible<Dataframe>::value,
"Dataframe is not move constructible." );
1136 static_assert( std::is_move_assignable<Dataframe>::value,
"Dataframe is not move assignable." );
1141 #endif // include guard