A toolkit for working with phylogenetic data.
v0.19.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
dataframe.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_CONTAINERS_DATAFRAME_H_
2 #define GENESIS_UTILS_CONTAINERS_DATAFRAME_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
35 
36 #include <cassert>
37 #include <cstddef>
38 #include <iterator>
39 #include <stdexcept>
40 #include <string>
41 #include <type_traits>
42 #include <unordered_map>
43 #include <utility>
44 #include <vector>
45 
46 namespace genesis {
47 namespace utils {
48 
49 // =================================================================================================
50 // Data Frame
51 // =================================================================================================
52 
56 template <typename T>
57 class Dataframe
58 {
59 public:
60 
61  // ---------------------------------------------------------------------------------------------
62  // Column Class
63  // ---------------------------------------------------------------------------------------------
64 
68  class Column
69  {
70  public:
71 
72  // -------------------------------------------------------------------------
73  // Member Types
74  // -------------------------------------------------------------------------
75 
76  using self_type = Column;
77  using value_type = T;
78 
80  using const_reference = const value_type&;
81  using pointer = value_type*;
82  using const_pointer = const value_type*;
83 
84  using iterator = typename std::vector< value_type >::iterator;
85  using const_iterator = typename std::vector< value_type >::const_iterator;
86 
87  using size_type = size_t;
88 
89  // -------------------------------------------------------------------------
90  // Constructor and Rule of Five
91  // -------------------------------------------------------------------------
92 
93  friend class Dataframe;
94 
96  : df_( &df )
97  , index_( index )
98  {}
99 
100  ~Column() = default;
101 
102  Column( Column const& ) = default;
103  Column( Column&& ) = default;
104 
105  Column& operator= ( Column const& ) = default;
106  Column& operator= ( Column&& ) = default;
107 
108  // -------------------------------------------------------------------------
109  // Iterators
110  // -------------------------------------------------------------------------
111 
112  public:
113 
115  {
116  return content_.begin();
117  }
118 
120  {
121  return content_.cbegin();
122  }
123 
125  {
126  return content_.end();
127  }
128 
130  {
131  return content_.cend();
132  }
133 
135  {
136  return content_.cbegin();
137  }
138 
140  {
141  return content_.cend();
142  }
143 
144  // -------------------------------------------------------------------------
145  // Properties
146  // -------------------------------------------------------------------------
147 
149  {
150  return *df_;
151  }
152 
153  Dataframe const& dataframe() const
154  {
155  return *df_;
156  }
157 
158  size_type size() const
159  {
160  return content_.size();
161  }
162 
163  bool empty() const
164  {
165  return content_.empty();
166  }
167 
168  size_type index() const
169  {
170  return index_;
171  }
172 
173  std::string const& name() const
174  {
175  return df_->col_name( index_ );
176  }
177 
178  // -------------------------------------------------------------------------
179  // Element Access
180  // -------------------------------------------------------------------------
181 
183  {
184  return content_[ index ];
185  }
186 
188  {
189  return content_[ index ];
190  }
191 
192  reference operator[] ( std::string const& row_name )
193  {
194  return content_[ df_->row_index( row_name ) ];
195  }
196 
197  const_reference operator[] ( std::string const& row_name ) const
198  {
199  return content_[ df_->row_index( row_name ) ];
200  }
201 
203  {
204  return content_.at( index );
205  }
206 
208  {
209  return content_.at( index );
210  }
211 
212  reference at( std::string const& row_name )
213  {
214  return content_[ df_->row_index( row_name ) ];
215  }
216 
217  const_reference at( std::string const& row_name ) const
218  {
219  return content_[ df_->row_index( row_name ) ];
220  }
221 
222  // -------------------------------------------------------------------------
223  // Modifiers
224  // -------------------------------------------------------------------------
225 
231  self_type& operator = ( std::vector<value_type> const& vec )
232  {
233  if( vec.size() != content_.size() ) {
234  throw std::runtime_error(
235  "Cannot assign vector with different size to Dataframe column."
236  );
237  }
238 
239  for( size_t i = 0; i < vec.size(); ++i ) {
240  content_[i] = vec[i];
241  }
242 
243  return *this;
244  }
245 
246  // -------------------------------------------------------------------------
247  // Data Members
248  // -------------------------------------------------------------------------
249 
250  private:
251 
252  Dataframe* df_ = nullptr;
253  size_type index_;
254 
255  std::vector< value_type > content_;
256 
257 
258  };
259 
260  // ---------------------------------------------------------------------------------------------
261  // Member Types
262  // ---------------------------------------------------------------------------------------------
263 
266 
268  using const_reference = const value_type&;
269  using pointer = value_type*;
270  using const_pointer = const value_type*;
271 
272  using iterator = typename std::vector< value_type >::iterator;
273  using const_iterator = typename std::vector< value_type >::const_iterator;
274 
275  using size_type = size_t;
276 
277  // ---------------------------------------------------------------------------------------------
278  // Constructor and Rule of Five
279  // ---------------------------------------------------------------------------------------------
280 
281  friend class Column;
282 
283  Dataframe() = default;
284  ~Dataframe() = default;
285 
286  Dataframe( Dataframe const& ) = default;
287  Dataframe ( Dataframe&& ) = default;
288 
289  Dataframe& operator= ( Dataframe const& ) = default;
290  Dataframe& operator= ( Dataframe&& ) = default;
291 
292  // ---------------------------------------------------------------------------------------------
293  // Iterators
294  // ---------------------------------------------------------------------------------------------
295 
297  {
298  return columns_.begin();
299  }
300 
302  {
303  return columns_.cbegin();
304  }
305 
307  {
308  return columns_.end();
309  }
310 
312  {
313  return columns_.cend();
314  }
315 
317  {
318  return columns_.cbegin();
319  }
320 
322  {
323  return columns_.cend();
324  }
325 
326  // ---------------------------------------------------------------------------------------------
327  // Properties
328  // ---------------------------------------------------------------------------------------------
329 
330  size_type rows() const
331  {
332  return row_names_.size();
333  }
334 
335  size_type cols() const
336  {
337  return columns_.size();
338  }
339 
340  bool empty() const
341  {
342  return columns_.empty() && row_names_.empty();
343  }
344 
345  // ---------------------------------------------------------------------------------------------
346  // Column Access
347  // ---------------------------------------------------------------------------------------------
348 
350  {
351  return columns_.at( column_index );
352  }
353 
354  const_reference operator[] ( size_type column_index ) const
355  {
356  return columns_.at( column_index );
357  }
358 
359  reference operator[] ( std::string const& col_name )
360  {
361  return columns_[ col_index( col_name ) ];
362  }
363 
364  const_reference operator[] ( std::string const& col_name ) const
365  {
366  return columns_[ col_index( col_name ) ];
367  }
368 
369  reference at( size_type column_index )
370  {
371  return columns_.at( column_index );
372  }
373 
374  const_reference at( size_type column_index ) const
375  {
376  return columns_.at( column_index );
377  }
378 
379  reference at( std::string const& col_name )
380  {
381  return columns_[ col_index( col_name ) ];
382  }
383 
384  const_reference at( std::string const& col_name ) const
385  {
386  return columns_[ col_index( col_name ) ];
387  }
388 
389  // ---------------------------------------------------------------------------------------------
390  // Element Access
391  // ---------------------------------------------------------------------------------------------
392 
394  {
395  return at( column_index ).at( row_index );
396  }
397 
399  {
400  return at( column_index ).at( row_index );
401  }
402 
403  typename Column::reference operator () ( std::string const& row_name, size_type column_index )
404  {
405  return at( column_index ).at( row_name );
406  }
407 
408  typename Column::const_reference operator () ( std::string const& row_name, size_type column_index ) const
409  {
410  return at( column_index ).at( row_name );
411  }
412 
414  {
415  return at( col_name ).at( row_index );
416  }
417 
418  typename Column::const_reference operator () ( size_type row_index, std::string const& col_name ) const
419  {
420  return at( col_name ).at( row_index );
421  }
422 
423  typename Column::reference operator () ( std::string const& row_name, std::string const& col_name )
424  {
425  return at( col_name ).at( row_name );
426  }
427 
428  typename Column::const_reference operator () ( std::string const& row_name, std::string const& col_name ) const
429  {
430  return at( col_name ).at( row_name );
431  }
432 
433  // ---------------------------------------------------------------------------------------------
434  // Indexing and Naming
435  // ---------------------------------------------------------------------------------------------
436 
437  size_t row_index( std::string const& row_name ) const
438  {
439  return row_lookup_.at( row_name );
440  }
441 
442  std::string const& row_name( size_type row_index ) const
443  {
444  return row_names_.at( row_index );
445  }
446 
447  self_type& row_name( size_type row_index, std::string const& value )
448  {
449  auto const& old = row_names_.at( row_index );
450  row_lookup_.erase( old );
451  row_lookup_[ value ] = row_index;
452  row_names_.at( row_index ) = value;
453 
454  return *this;
455  }
456 
457  std::vector<std::string> const& row_names() const
458  {
459  return row_names_;
460  }
461 
462  size_t col_index( std::string const& col_name ) const
463  {
464  return col_lookup_.at( col_name );
465  }
466 
467  std::string const& col_name( size_type col_index ) const
468  {
469  return col_names_.at( col_index );
470  }
471 
472  self_type& col_name( size_type col_index, std::string const& value )
473  {
474  auto const& old = col_names_.at( col_index );
475  col_lookup_.erase( old );
476  col_lookup_[ value ] = col_index;
477  col_names_.at( col_index ) = value;
478 
479  return *this;
480  }
481 
482  std::vector<std::string> col_names() const
483  {
484  return col_names_;
485  }
486 
487  // ---------------------------------------------------------------------------------------------
488  // Modifiers
489  // ---------------------------------------------------------------------------------------------
490 
491  void clear()
492  {
493  columns_.clear();
494  row_names_.clear();
495  col_names_.clear();
496  row_lookup_.clear();
497  col_lookup_.clear();
498  }
499 
500  void clear_rows()
501  {
502  for( auto& col : columns_ ) {
503  col.content_.clear();
504  }
505  row_names_.clear();
506  row_lookup_.clear();
507  }
508 
509  void clear_cols()
510  {
511  columns_.clear();
512  col_names_.clear();
513  col_lookup_.clear();
514  }
515 
517  {
518  auto const index = columns_.size();
519  columns_.emplace_back( *this, index );
520  columns_.back().content_.resize( row_names_.size() );
521  col_names_.emplace_back();
522 
523  return *this;
524  }
525 
526  self_type& add_col( std::string const& name )
527  {
528  if( col_lookup_.count( name ) > 0 ) {
529  throw std::runtime_error( "Column with name " + name + " already exists in Dataframe." );
530  }
531 
532  auto const index = columns_.size();
533  columns_.emplace_back( *this, index );
534  columns_.back().content_.resize( row_names_.size() );
535  col_names_.emplace_back( name );
536  col_lookup_[ name ] = index;
537 
538  return *this;
539  }
540 
542  {
543  row_names_.emplace_back();
544 
545  for( auto& col : columns_ ) {
546  col.content_.emplace_back();
547  }
548 
549  return *this;
550  }
551 
552  self_type& add_row( std::string const& name )
553  {
554  // Add name.
555  if( row_lookup_.count( name ) > 0 ) {
556  throw std::runtime_error( "Row with name " + name + " already exists in Dataframe." );
557  }
558  row_names_.emplace_back( name );
559  row_lookup_[ name ] = row_names_.size() - 1;
560 
561  // Add content.
562  for( auto& col : columns_ ) {
563  col.content_.emplace_back();
564  }
565 
566  return *this;
567  }
568 
569  // ---------------------------------------------------------------------------------------------
570  // Data Members
571  // ---------------------------------------------------------------------------------------------
572 
573 private:
574 
575  std::vector< value_type > columns_;
576 
577  std::vector< std::string > row_names_;
578  std::vector< std::string > col_names_;
579 
580  std::unordered_map< std::string, size_t > row_lookup_;
581  std::unordered_map< std::string, size_t > col_lookup_;
582 
583 };
584 
585 } // namespace utils
586 } // namespace genesis
587 
588 #endif // include guard
const_iterator begin() const
Definition: dataframe.hpp:301
self_type & col_name(size_type col_index, std::string const &value)
Definition: dataframe.hpp:472
reference at(std::string const &row_name)
Definition: dataframe.hpp:212
typename std::vector< value_type >::iterator iterator
Definition: dataframe.hpp:84
Column & operator=(Column const &)=default
const_iterator end() const
Definition: dataframe.hpp:311
size_t col_index(std::string const &col_name) const
Definition: dataframe.hpp:462
typename std::vector< value_type >::const_iterator const_iterator
Definition: dataframe.hpp:85
typename std::vector< value_type >::iterator iterator
Definition: dataframe.hpp:272
const_iterator end() const
Definition: dataframe.hpp:129
const_reference at(size_type index) const
Definition: dataframe.hpp:207
size_t row_index(std::string const &row_name) const
Definition: dataframe.hpp:437
const value_type & const_reference
Definition: dataframe.hpp:80
Column::reference operator()(size_type row_index, size_type column_index)
Definition: dataframe.hpp:393
Provides some valuable additions to STD.
const_reference at(std::string const &col_name) const
Definition: dataframe.hpp:384
reference at(size_type index)
Definition: dataframe.hpp:202
self_type & add_col(std::string const &name)
Definition: dataframe.hpp:526
const_iterator begin() const
Definition: dataframe.hpp:119
std::string const & row_name(size_type row_index) const
Definition: dataframe.hpp:442
Dataframe const & dataframe() const
Definition: dataframe.hpp:153
size_type rows() const
Definition: dataframe.hpp:330
std::vector< std::string > col_names() const
Definition: dataframe.hpp:482
const value_type * const_pointer
Definition: dataframe.hpp:82
Dataframe & operator=(Dataframe const &)=default
reference at(size_type column_index)
Definition: dataframe.hpp:369
reference operator[](size_type index)
Definition: dataframe.hpp:182
Column(Dataframe &df, size_type index)
Definition: dataframe.hpp:95
std::string const & name() const
Definition: dataframe.hpp:173
const_iterator cbegin()
Definition: dataframe.hpp:316
self_type & add_row(std::string const &name)
Definition: dataframe.hpp:552
size_type cols() const
Definition: dataframe.hpp:335
const_reference at(std::string const &row_name) const
Definition: dataframe.hpp:217
std::string const & col_name(size_type col_index) const
Definition: dataframe.hpp:467
const_reference at(size_type column_index) const
Definition: dataframe.hpp:374
const_iterator cend()
Definition: dataframe.hpp:321
reference at(std::string const &col_name)
Definition: dataframe.hpp:379
typename std::vector< value_type >::const_iterator const_iterator
Definition: dataframe.hpp:273
std::vector< std::string > const & row_names() const
Definition: dataframe.hpp:457
self_type & row_name(size_type row_index, std::string const &value)
Definition: dataframe.hpp:447
reference operator[](size_type column_index)
Definition: dataframe.hpp:349