A toolkit for working with phylogenetic data.
v0.24.0
containers/dataframe.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_CONTAINERS_DATAFRAME_H_
2 #define GENESIS_UTILS_CONTAINERS_DATAFRAME_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2020 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
36 
37 #include <cassert>
38 #include <cstddef>
39 #include <cstdint>
40 #include <iterator>
41 #include <memory>
42 #include <stdexcept>
43 #include <string>
44 #include <type_traits>
45 #include <unordered_map>
46 #include <utility>
47 #include <vector>
48 
49 namespace genesis {
50 namespace utils {
51 
52 // =================================================================================================
53 // Dataframe
54 // =================================================================================================
55 
59 class Dataframe
60 {
61 public:
62 
63  // ---------------------------------------------------------------------------------------------
64  // Forward Declarations
65  // ---------------------------------------------------------------------------------------------
66 
67  template <typename T>
68  class Column;
69 
70  // ---------------------------------------------------------------------------------------------
71  // Column Base
72  // ---------------------------------------------------------------------------------------------
73 
74  class ColumnBase
75  {
76  public:
77 
78  // -------------------------------------------------------------------------
79  // Member Types
80  // -------------------------------------------------------------------------
81 
82  using size_type = size_t;
83 
84  friend class Dataframe;
85  friend void swap( Dataframe& lhs, Dataframe& rhs );
86 
87  // -------------------------------------------------------------------------
88  // Constructor and Rule of Five
89  // -------------------------------------------------------------------------
90 
91  protected:
92 
93  ColumnBase() = delete;
94 
96  : df_( &df )
97  , index_( index )
98  {}
99 
100  ColumnBase( ColumnBase const& ) = delete;
101  ColumnBase( ColumnBase&& ) = delete;
102 
103  ColumnBase& operator= ( ColumnBase const& ) = delete;
104  ColumnBase& operator= ( ColumnBase&& ) = delete;
105 
106  public:
107 
108  virtual ~ColumnBase() = default;
109 
110  // -------------------------------------------------------------------------
111  // Properties
112  // -------------------------------------------------------------------------
113 
115  {
116  return *df_;
117  }
118 
119  Dataframe const& dataframe() const
120  {
121  return *df_;
122  }
123 
124  size_type index() const
125  {
126  assert( df_ && &(df_->at(index_)) == this );
127  return index_;
128  }
129 
130  std::string const& name() const
131  {
132  return df_->col_name( index_ );
133  }
134 
135  size_type size() const
136  {
137  // Non-virtual interface
138  return size_();
139  }
140 
141  bool empty() const
142  {
143  // Non-virtual interface
144  return empty_();
145  }
146 
147  // -------------------------------------------------------------------------
148  // Casting and Types
149  // -------------------------------------------------------------------------
150 
151  template<typename T>
153  {
154  return dynamic_cast<Column<T>&>( *this );
155  }
156 
157  template<typename T>
158  Column<T> const& as() const
159  {
160  return dynamic_cast<Column<T> const&>( *this );
161  }
162 
163  template<typename T>
164  bool is() const
165  {
166  auto const c = dynamic_cast<Column<T> const*>( this );
167  return ( c != nullptr );
168  }
169 
170  template<typename T>
171  T& get( size_type index )
172  {
173  return as<T>()[index];
174  }
175 
176  template<typename T>
177  T const& get( size_type index ) const
178  {
179  return as<T>()[index];
180  }
181 
182  template<typename T>
183  T& get( std::string const& row_name )
184  {
185  return as<T>()[ dataframe().row_index( row_name ) ];
186  }
187 
188  template<typename T>
189  T const& get( std::string const& row_name ) const
190  {
191  return as<T>()[ dataframe().row_index( row_name ) ];
192  }
193 
194  // -------------------------------------------------------------------------
195  // Purely Virtual Functions
196  // -------------------------------------------------------------------------
197 
198  protected:
199 
200  // Non-virtual interface for public functions.
201  virtual size_type size_() const = 0;
202  virtual bool empty_() const = 0;
203 
204  // Private functions for internal use only.
205  virtual void clear_() = 0;
206  virtual void resize_( size_type ) = 0;
207  virtual void add_row_() = 0;
208  virtual void remove_row_( size_type row_index ) = 0;
209  virtual std::unique_ptr<ColumnBase> clone_() const = 0;
210 
211  // -------------------------------------------------------------------------
212  // Data Members
213  // -------------------------------------------------------------------------
214 
215  private:
216 
217  Dataframe* df_;
218  size_type index_;
219 
220  };
221 
222  // ---------------------------------------------------------------------------------------------
223  // Column Class
224  // ---------------------------------------------------------------------------------------------
225 
229  template <typename T>
230  class Column : public ColumnBase
231  {
232  public:
233 
234  // -------------------------------------------------------------------------
235  // Member Types
236  // -------------------------------------------------------------------------
237 
239  using value_type = T;
240  using container_type = std::vector< value_type >;
241 
243  using const_reference = value_type const&;
244  using pointer = value_type*;
245  using const_pointer = value_type const*;
246 
247  using iterator = typename container_type::iterator;
248  using const_iterator = typename container_type::const_iterator;
249 
250  using size_type = size_t;
251 
252  friend class Dataframe;
253 
254  static_assert(
255  ! std::is_same<value_type, bool>::value,
256  "Cannot instanciate Dataframe Column with type bool, "
257  "because std::vector<bool> does not offer proper references."
258  );
259 
260  // -------------------------------------------------------------------------
261  // Constructor and Rule of Five
262  // -------------------------------------------------------------------------
263 
264  private:
265 
266  Column() = delete;
267 
269  : ColumnBase( df, index )
270  {}
271 
272  Column( Column const& ) = delete;
273  Column( Column&& ) = delete;
274 
275  Column& operator= ( Column const& other ) = delete;
276  Column& operator= ( Column&& other ) = delete;
277 
278  public:
279 
280  virtual ~Column() override = default;
281 
282  // -------------------------------------------------------------------------
283  // Iterators
284  // -------------------------------------------------------------------------
285 
286  public:
287 
289  {
290  return content_.begin();
291  }
292 
294  {
295  return content_.cbegin();
296  }
297 
299  {
300  return content_.end();
301  }
302 
304  {
305  return content_.cend();
306  }
307 
309  {
310  return content_.cbegin();
311  }
312 
314  {
315  return content_.cend();
316  }
317 
318  // -------------------------------------------------------------------------
319  // Element Access
320  // -------------------------------------------------------------------------
321 
323  {
324  return content_[ index ];
325  }
326 
328  {
329  return content_[ index ];
330  }
331 
332  reference operator[] ( std::string const& row_name )
333  {
334  return content_[ dataframe().row_index( row_name ) ];
335  }
336 
337  const_reference operator[] ( std::string const& row_name ) const
338  {
339  return content_[ dataframe().row_index( row_name ) ];
340  }
341 
343  {
344  return content_.at( index );
345  }
346 
348  {
349  return content_.at( index );
350  }
351 
352  reference at( std::string const& row_name )
353  {
354  return content_[ dataframe().row_index( row_name ) ];
355  }
356 
357  const_reference at( std::string const& row_name ) const
358  {
359  return content_[ dataframe().row_index( row_name ) ];
360  }
361 
362  // -------------------------------------------------------------------------
363  // Modifiers
364  // -------------------------------------------------------------------------
365 
372  self_type& operator = ( std::vector<value_type> const& vec )
373  {
374  if( vec.size() != content_.size() ) {
375  throw std::runtime_error(
376  "Cannot assign vector with different size to Dataframe column."
377  );
378  }
379 
380  content_ = vec;
381  return *this;
382  }
383 
390  self_type& operator = ( std::vector<value_type>&& vec )
391  {
392  if( vec.size() != content_.size() ) {
393  throw std::runtime_error(
394  "Cannot assign vector with different size to Dataframe column."
395  );
396  }
397 
398  content_ = std::move( vec );
399  return *this;
400  }
401 
402  // -------------------------------------------------------------------------
403  // Interaction Operators
404  // -------------------------------------------------------------------------
405 
414  std::vector<value_type> const& to_vector() const
415  {
416  return content_;
417  }
418 
424  operator std::vector<value_type> const&() const
425  {
426  return content_;
427  }
428 
429  // -------------------------------------------------------------------------
430  // Internal Members
431  // -------------------------------------------------------------------------
432 
433  private:
434 
435  size_type size_() const override
436  {
437  return content_.size();
438  }
439 
440  bool empty_() const override
441  {
442  return content_.empty();
443  }
444 
445  void clear_() override
446  {
447  content_.clear();
448  }
449 
450  void resize_( size_type size ) override
451  {
452  content_.resize( size );
453  }
454 
455  void add_row_() override
456  {
457  content_.emplace_back();
458  }
459 
460  void remove_row_( size_type row_index ) override
461  {
462  content_.erase( content_.begin() + row_index );
463  }
464 
465  std::unique_ptr<ColumnBase> clone_() const override
466  {
467  // Nicer version, doesn't work in gcc 4.9 though...
468  // auto ret = std::unique_ptr< self_type >( new self_type( *df_, index_ ));
469  // ret->content_ = content_;
470  // return ret;
471 
472  // Seem to need this for older compilers...
473  auto ret = std::unique_ptr< ColumnBase >( new self_type( *df_, index_ ));
474  dynamic_cast<self_type&>( *ret ).content_ = content_;
475  return ret;
476  }
477 
478  // -------------------------------------------------------------------------
479  // Data Members
480  // -------------------------------------------------------------------------
481 
482  private:
483 
484  std::vector< value_type > content_;
485  };
486 
487  // ---------------------------------------------------------------------------------------------
488  // Member Types
489  // ---------------------------------------------------------------------------------------------
490 
491 public:
492 
495  using container_type = std::vector< std::unique_ptr< value_type >>;
496 
498  using const_reference = value_type const&;
499  using pointer = value_type*;
500  using const_pointer = value_type const*;
501 
504 
505  using size_type = size_t;
506 
507  // ---------------------------------------------------------------------------------------------
508  // Constructor and Rule of Five
509  // ---------------------------------------------------------------------------------------------
510 
511  Dataframe() = default;
512  ~Dataframe() = default;
513 
514  Dataframe( Dataframe const& other )
515  : row_names_ ( other.row_names_ )
516  , col_names_ ( other.col_names_ )
517  , row_lookup_( other.row_lookup_ )
518  , col_lookup_( other.col_lookup_ )
519  {
520  columns_.clear();
521  for( auto const& col : other.columns_ ) {
522  columns_.emplace_back( col->clone_() );
523  columns_.back()->df_ = this;
524  columns_.back()->index_ = columns_.size() - 1;
525 
526  assert( columns_.size() > 0 );
527  assert( columns_.back()->size() == row_names_.size() );
528  }
529  }
530 
531  Dataframe& operator= ( Dataframe const& other )
532  {
533  using std::swap;
534 
535  // Check for self assignment. Just in case.
536  if( &other == this ) {
537  return *this;
538  }
539 
540  // Copy-swap-idiom.
541  self_type temp( other );
542  swap( *this, temp );
543  return *this;
544  }
545 
546  Dataframe( Dataframe&& ) = default;
547  Dataframe& operator= ( Dataframe&& ) = default;
548 
549  friend void swap( self_type& lhs, self_type& rhs )
550  {
551  using std::swap;
552  swap( lhs.columns_, rhs.columns_ );
553  swap( lhs.row_names_, rhs.row_names_ );
554  swap( lhs.col_names_, rhs.col_names_ );
555  swap( lhs.row_lookup_, rhs.row_lookup_ );
556  swap( lhs.col_lookup_, rhs.col_lookup_ );
557 
558  // Need to swap dataframe pointers of the columns as well!
559  for( size_t i = 0; i < lhs.columns_.size(); ++i ) {
560  lhs.columns_[i]->df_ = &lhs;
561  }
562  for( size_t i = 0; i < rhs.columns_.size(); ++i ) {
563  rhs.columns_[i]->df_ = &rhs;
564  }
565  }
566 
572  friend bool validate( Dataframe const& );
573 
574  // ---------------------------------------------------------------------------------------------
575  // Iterators
576  // ---------------------------------------------------------------------------------------------
577 
579  {
580  return columns_.begin();
581  }
582 
584  {
585  return columns_.cbegin();
586  }
587 
589  {
590  return columns_.end();
591  }
592 
594  {
595  return columns_.cend();
596  }
597 
599  {
600  return columns_.cbegin();
601  }
602 
604  {
605  return columns_.cend();
606  }
607 
608  // ---------------------------------------------------------------------------------------------
609  // Properties
610  // ---------------------------------------------------------------------------------------------
611 
612  size_type rows() const
613  {
614  return row_names_.size();
615  }
616 
617  size_type cols() const
618  {
619  assert( columns_.size() == col_names_.size() );
620  return columns_.size();
621  }
622 
623  bool empty() const
624  {
625  return columns_.empty() && row_names_.empty();
626  }
627 
628  // ---------------------------------------------------------------------------------------------
629  // Column Access
630  // ---------------------------------------------------------------------------------------------
631 
633  {
634  return *columns_.at( col_index );
635  }
636 
638  {
639  return *columns_.at( col_index );
640  }
641 
642  reference operator[] ( std::string const& col_name )
643  {
644  return *columns_[ col_index( col_name ) ];
645  }
646 
647  const_reference operator[] ( std::string const& col_name ) const
648  {
649  return *columns_[ col_index( col_name ) ];
650  }
651 
653  {
654  return *columns_.at( col_index );
655  }
656 
658  {
659  return *columns_.at( col_index );
660  }
661 
662  reference at( std::string const& col_name )
663  {
664  return *columns_[ col_index( col_name ) ];
665  }
666 
667  const_reference at( std::string const& col_name ) const
668  {
669  return *columns_[ col_index( col_name ) ];
670  }
671 
672  // ---------------------------------------------------------------------------------------------
673  // Indexing and Naming
674  // ---------------------------------------------------------------------------------------------
675 
676  bool has_row_name( std::string const& row_name ) const
677  {
678  if( row_name.empty() ) {
679  throw std::runtime_error( "Cannot use empty row name." );
680  }
681  return ( row_lookup_.count( row_name ) > 0 );
682  }
683 
684  bool has_unnamed_rows() const
685  {
686  return row_lookup_.size() < row_names_.size();
687  }
688 
689  size_t row_index( std::string const& row_name ) const
690  {
691  if( row_lookup_.count( row_name ) == 0 ) {
692  throw std::out_of_range( "Dataframe has no row with name '" + row_name + "'." );
693  }
694  return row_lookup_.at( row_name );
695  }
696 
697  std::string const& row_name( size_type row_index ) const
698  {
699  return row_names_.at( row_index );
700  }
701 
702  self_type& row_name( size_type row_index, std::string const& value )
703  {
704  auto const& old = row_names_.at( row_index );
705  row_lookup_.erase( old );
706  row_lookup_[ value ] = row_index;
707  row_names_.at( row_index ) = value;
708 
709  return *this;
710  }
711 
712  std::vector<std::string> const& row_names() const
713  {
714  return row_names_;
715  }
716 
717  bool has_col_name( std::string const& col_name ) const
718  {
719  if( col_name.empty() ) {
720  throw std::runtime_error( "Cannot use empty column name." );
721  }
722  return ( col_lookup_.count( col_name ) > 0 );
723  }
724 
725  bool has_unnamed_cols() const
726  {
727  return col_lookup_.size() < col_names_.size();
728  }
729 
730  size_t col_index( std::string const& col_name ) const
731  {
732  if( col_lookup_.count( col_name ) == 0 ) {
733  throw std::out_of_range( "Dataframe has no column with name '" + col_name + "'." );
734  }
735  return col_lookup_.at( col_name );
736  }
737 
738  std::string const& col_name( size_type col_index ) const
739  {
740  return col_names_.at( col_index );
741  }
742 
743  self_type& col_name( size_type col_index, std::string const& value )
744  {
745  auto const& old = col_names_.at( col_index );
746  col_lookup_.erase( old );
747  col_lookup_[ value ] = col_index;
748  col_names_.at( col_index ) = value;
749 
750  return *this;
751  }
752 
753  std::vector<std::string> const& col_names() const
754  {
755  return col_names_;
756  }
757 
758  // ---------------------------------------------------------------------------------------------
759  // Adding rows and cols
760  // ---------------------------------------------------------------------------------------------
761 
762  template<class T>
764  {
765  auto const index = columns_.size();
766  columns_.emplace_back( std::unique_ptr<Column<T>>( new Column<T>( *this, index )));
767  columns_.back()->resize_( row_names_.size() );
768  col_names_.emplace_back();
769 
770  return columns_.back()->as<T>();
771  }
772 
773  template<class T>
774  Column<T>& add_unnamed_col( T const& init )
775  {
776  auto& col = add_unnamed_col<T>();
777  for( auto& e : col.content_ ) {
778  e = init;
779  }
780  return col;
781  }
782 
783  template<class T>
784  Column<T>& add_unnamed_col( std::vector<T> const& init )
785  {
786  auto copy = init;
787  return add_unnamed_col( std::move( copy ));
788  }
789 
790  template<class T>
791  Column<T>& add_unnamed_col( std::vector<T>&& init )
792  {
793  if( init.size() != row_names_.size() ) {
794  throw std::invalid_argument(
795  "Cannot add column to Dataframe if initial values vector is of different size."
796  );
797  }
798  auto& col = add_unnamed_col<T>();
799  assert( col.size() == row_names_.size() );
800 
801  // for( size_t i = 0; i < init.size(); ++i ) {
802  // col.content_[i] = init[i];
803  // }
804  col.content_ = std::move( init );
805  return col;
806  }
807 
808  template<class T>
809  Column<T>& add_col( std::string const& name )
810  {
811  if( name.empty() ) {
812  throw std::runtime_error(
813  "Cannot add a column with an empty name. Use add_unnamed_col() instead."
814  );
815  }
816  if( col_lookup_.count( name ) > 0 ) {
817  throw std::runtime_error( "Column with name " + name + " already exists in Dataframe." );
818  }
819 
820  auto const index = columns_.size();
821  // columns_.emplace_back( *this, index );
822  columns_.emplace_back( std::unique_ptr<Column<T>>( new Column<T>( *this, index )));
823  columns_.back()->resize_( row_names_.size() );
824  col_names_.emplace_back( name );
825  col_lookup_[ name ] = index;
826 
827  return columns_.back()->as<T>();
828  }
829 
830  template<class T>
831  Column<T>& add_col( std::string const& name, T const& init )
832  {
833  auto& col = add_col<T>( name );
834  for( auto& e : col.content_ ) {
835  e = init;
836  }
837  return col;
838  }
839 
840  template<class T>
841  Column<T>& add_col( std::string const& name, std::vector<T> const& init )
842  {
843  auto copy = init;
844  return add_col( name, std::move( copy ));
845  }
846 
847  template<class T>
848  Column<T>& add_col( std::string const& name, std::vector<T>&& init )
849  {
850  if( init.size() != row_names_.size() ) {
851  throw std::invalid_argument(
852  "Cannot add column to Dataframe if initial values vector is of different size: "
853  + std::to_string( init.size() ) + " != " + std::to_string( row_names_.size() )
854  );
855  }
856  auto& col = add_col<T>( name );
857  assert( col.size() == row_names_.size() );
858 
859  // for( size_t i = 0; i < init.size(); ++i ) {
860  // col.content_[i] = init[i];
861  // }
862  col.content_ = std::move( init );
863  return col;
864  }
865 
867  {
868  row_names_.emplace_back();
869 
870  for( auto& col : columns_ ) {
871  assert( row_names_.size() == col->size() + 1 );
872  col->add_row_();
873  }
874 
875  return *this;
876  }
877 
878  self_type& add_row( std::string const& name )
879  {
880  if( name.empty() ) {
881  throw std::runtime_error(
882  "Cannot add a row with an empty name. Use add_unnamed_row() instead."
883  );
884  }
885  if( row_lookup_.count( name ) > 0 ) {
886  throw std::runtime_error( "Row with name " + name + " already exists in Dataframe." );
887  }
888 
889  // Add name.
890  row_names_.emplace_back( name );
891  row_lookup_[ name ] = row_names_.size() - 1;
892 
893  // Add content.
894  for( auto& col : columns_ ) {
895  assert( row_names_.size() == col->size() + 1 );
896  col->add_row_();
897  }
898 
899  return *this;
900  }
901 
902  // ---------------------------------------------------------------------------------------------
903  // Replacing cols
904  // ---------------------------------------------------------------------------------------------
905 
906  template<class T>
908  {
909  assert( columns_.size() == col_names_.size() );
910  if( at_index >= columns_.size() ) {
911  throw std::runtime_error(
912  "Invalid column index greater than or equal to number of columns."
913  );
914  }
915 
916  columns_[ at_index ] = std::unique_ptr<Column<T>>( new Column<T>( *this, at_index ));
917  columns_[ at_index ]->resize_( row_names_.size() );
918  return columns_[ at_index ]->as<T>();
919  }
920 
921  template<class T>
922  Column<T>& replace_col( size_type at_index, T const& init )
923  {
924  auto& col = replace_col<T>( at_index );
925  for( auto& e : col.content_ ) {
926  e = init;
927  }
928  return col;
929  }
930 
931  template<class T>
932  Column<T>& replace_col( size_type at_index, std::vector<T> const& init )
933  {
934  auto copy = init;
935  return replace_col( at_index, std::move( copy ));
936  }
937 
938  template<class T>
939  Column<T>& replace_col( size_type at_index, std::vector<T>&& init )
940  {
941  if( init.size() != row_names_.size() ) {
942  throw std::invalid_argument(
943  "Cannot replace column in Dataframe if initial values vector is of different size."
944  );
945  }
946  auto& col = replace_col<T>( at_index );
947  assert( col.size() == row_names_.size() );
948 
949  // for( size_t i = 0; i < init.size(); ++i ) {
950  // col.content_[i] = init[i];
951  // }
952  col.content_ = std::move( init );
953  return col;
954  }
955 
956  template<class T>
957  Column<T>& replace_col( std::string const& at_name )
958  {
959  if( at_name.empty() ) {
960  throw std::runtime_error( "Cannot replace column with empty name given." );
961  }
962  if( col_lookup_.count( at_name ) == 0 ) {
963  throw std::runtime_error(
964  "Column with name " + at_name + " does not exist in Dataframe."
965  );
966  }
967 
968  auto const index = col_index( at_name );
969  assert( col_names_[ index ] == at_name );
970  return replace_col<T>( index );
971  }
972 
973  template<class T>
974  Column<T>& replace_col( std::string const& at_name, T const& init )
975  {
976  auto& col = replace_col<T>( at_name );
977  for( auto& e : col.content_ ) {
978  e = init;
979  }
980  return col;
981  }
982 
983  template<class T>
984  Column<T>& replace_col( std::string const& at_name, std::vector<T> const& init )
985  {
986  auto copy = init;
987  return replace_col( at_name, std::move( copy ));
988  }
989 
990  template<class T>
991  Column<T>& replace_col( std::string const& at_name, std::vector<T>&& init )
992  {
993  if( init.size() != row_names_.size() ) {
994  throw std::invalid_argument(
995  "Cannot replace column in Dataframe if initial values vector is of different size."
996  );
997  }
998  auto& col = replace_col<T>( at_name );
999  assert( col.size() == row_names_.size() );
1000 
1001  // for( size_t i = 0; i < init.size(); ++i ) {
1002  // col.content_[i] = init[i];
1003  // }
1004  col.content_ = std::move( init );
1005  return col;
1006  }
1007 
1008  // ---------------------------------------------------------------------------------------------
1009  // Removing rows and cols
1010  // ---------------------------------------------------------------------------------------------
1011 
1013  {
1014  columns_.clear();
1015  row_names_.clear();
1016  col_names_.clear();
1017  row_lookup_.clear();
1018  col_lookup_.clear();
1019  return *this;
1020  }
1021 
1023  {
1024  for( auto& col : columns_ ) {
1025  col->clear_();
1026  }
1027  row_names_.clear();
1028  row_lookup_.clear();
1029  return *this;
1030  }
1031 
1033  {
1034  columns_.clear();
1035  col_names_.clear();
1036  col_lookup_.clear();
1037  return *this;
1038  }
1039 
1041  {
1042  assert( columns_.size() == col_names_.size() );
1043  if( col_index >= columns_.size() ) {
1044  throw std::runtime_error(
1045  "Invalid column index greater than or equal to number of columns."
1046  );
1047  }
1048 
1049  // Remove elements.
1050  auto const name = col_names_[ col_index ];
1051  columns_.erase( columns_.begin() + col_index );
1052  col_names_.erase( col_names_.begin() + col_index );
1053  col_lookup_.erase( name );
1054 
1055  // Adjust remaining indices.
1056  for( size_t i = col_index; i < columns_.size(); ++i ) {
1057  --columns_[i]->index_;
1058  assert( columns_[i]->index() == i );
1059  }
1060 
1061  // Adjust indices of all lookup table values that are greater than the removed index.
1062  for( auto& le : col_lookup_ ) {
1063  assert( le.second != col_index );
1064  if( le.second > col_index ) {
1065  --le.second;
1066  }
1067  }
1068 
1069  return *this;
1070  }
1071 
1072  self_type& remove_col( std::string const& col_name )
1073  {
1074  auto const index = col_index( col_name );
1075  assert( col_names_[ index ] == col_name );
1076  remove_col( index );
1077  return *this;
1078  }
1079 
1081  {
1082  if( row_index >= row_names_.size() ) {
1083  throw std::runtime_error( "Invalid row index greater than or equal to number of rows." );
1084  }
1085 
1086  // Remove elements.
1087  for( auto& col : columns_ ) {
1088  assert( col->size() == row_names_.size() );
1089  // col.content_.erase( col.content_.begin() + row_index );
1090  col->remove_row_( row_index );
1091  }
1092  auto const name = row_names_[ row_index ];
1093  row_names_.erase( row_names_.begin() + row_index );
1094  row_lookup_.erase( name );
1095 
1096  // Adjust remaining indices.
1097  for( auto& le : row_lookup_ ) {
1098  assert( le.second != row_index );
1099  if( le.second > row_index ) {
1100  --le.second;
1101  }
1102  }
1103 
1104  return *this;
1105  }
1106 
1107  self_type& remove_row( std::string const& row_name )
1108  {
1109  auto const index = row_index( row_name );
1110  assert( row_names_[ index ] == row_name );
1111  remove_row( index );
1112  return *this;
1113  }
1114 
1115  // ---------------------------------------------------------------------------------------------
1116  // Data Members
1117  // ---------------------------------------------------------------------------------------------
1118 
1119 private:
1120 
1121  container_type columns_;
1122 
1123  std::vector< std::string > row_names_;
1124  std::vector< std::string > col_names_;
1125 
1126  std::unordered_map< std::string, size_t > row_lookup_;
1127  std::unordered_map< std::string, size_t > col_lookup_;
1128 
1129 };
1130 
1131 // =================================================================================================
1132 // Dataframe Assertions
1133 // =================================================================================================
1134 
1135 static_assert( std::is_move_constructible<Dataframe>::value, "Dataframe is not move constructible." );
1136 static_assert( std::is_move_assignable<Dataframe>::value, "Dataframe is not move assignable." );
1137 
1138 } // namespace utils
1139 } // namespace genesis
1140 
1141 #endif // include guard
Column< T > & replace_col(std::string const &at_name)
std::string const & row_name(size_type row_index) const
self_type & remove_col(size_type col_index)
Column< T > & add_unnamed_col(std::vector< T > &&init)
self_type & remove_row(size_type row_index)
friend bool validate(Dataframe const &)
Helper function to validate internal invariants.
reference at(size_type col_index)
self_type & col_name(size_type col_index, std::string const &value)
reference at(std::string const &row_name)
ColumnBase & operator=(ColumnBase const &)=delete
std::vector< value_type > const & to_vector() const
Explicit conversion to std::vector.
Iterator class that exposes elements in a container of pointers.
virtual std::unique_ptr< ColumnBase > clone_() const =0
Column< T > & add_unnamed_col(std::vector< T > const &init)
void swap(SequenceSet &lhs, SequenceSet &rhs)
reference operator[](size_type col_index)
Column< T > & add_col(std::string const &name)
const_reference at(std::string const &col_name) const
Column< T > & replace_col(size_type at_index, std::vector< T > &&init)
const_iterator begin() const
virtual void remove_row_(size_type row_index)=0
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
typename container_type::const_iterator const_iterator
friend void swap(self_type &lhs, self_type &rhs)
Column< T > & add_col(std::string const &name, T const &init)
std::string const & col_name(size_type col_index) const
Provides some valuable additions to STD.
Column< T > & replace_col(size_type at_index, std::vector< T > const &init)
virtual size_type size_() const =0
typename container_type::iterator iterator
Column< T > & replace_col(size_type at_index)
Column< T > & replace_col(std::string const &at_name, std::vector< T > const &init)
virtual void resize_(size_type)=0
size_t col_index(std::string const &col_name) const
const_reference at(std::string const &row_name) const
virtual bool empty_() const =0
Column< T > & add_col(std::string const &name, std::vector< T > const &init)
self_type & remove_row(std::string const &row_name)
std::vector< std::string > const & col_names() const
size_t row_index(std::string const &row_name) const
std::vector< std::unique_ptr< value_type > > container_type
ColumnBase(Dataframe &df, size_type index)
Column< T > & add_unnamed_col(T const &init)
Column< T > & replace_col(size_type at_index, T const &init)
const_reference at(size_type index) const
self_type & add_row(std::string const &name)
bool has_col_name(std::string const &col_name) const
std::vector< std::string > const & row_names() const
Dataframe(Dataframe const &other)
friend void swap(Dataframe &lhs, Dataframe &rhs)
const_reference at(size_type col_index) const
std::shared_ptr< BaseOutputTarget > to_string(std::string &target_string)
Obtain an output target for writing to a string.
Column< T > & replace_col(std::string const &at_name, std::vector< T > &&init)
bool has_row_name(std::string const &row_name) const
reference at(std::string const &col_name)
self_type & remove_col(std::string const &col_name)
Column< T > & replace_col(std::string const &at_name, T const &init)
std::vector< value_type > container_type
self_type & row_name(size_type row_index, std::string const &value)
Column< T > & add_col(std::string const &name, std::vector< T > &&init)