A toolkit for working with phylogenetic data.
v0.24.0
utils/containers/matrix/reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_CONTAINERS_MATRIX_READER_H_
2 #define GENESIS_UTILS_CONTAINERS_MATRIX_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
39 
40 #include <functional>
41 #include <stdexcept>
42 #include <sstream>
43 #include <string>
44 #include <vector>
45 
46 namespace genesis {
47 namespace utils {
48 
49 // =================================================================================================
50 // MatrixReader
51 // =================================================================================================
52 
53 template <typename T>
55 {
56 public:
57 
58  // -------------------------------------------------------------
59  // Constructors and Rule of Five
60  // -------------------------------------------------------------
61 
62  explicit MatrixReader( char separator_char = '\t' )
63  {
64  reader_.separator_chars( std::string( 1, separator_char ));
65  }
66 
67  explicit MatrixReader( CsvReader const& reader )
68  : reader_( reader )
69  {}
70 
71  ~MatrixReader() = default;
72 
73  MatrixReader(MatrixReader const&) = default;
74  MatrixReader(MatrixReader&&) = default;
75 
76  MatrixReader& operator= (MatrixReader const&) = default;
77  MatrixReader& operator= (MatrixReader&&) = default;
78 
79  // -------------------------------------------------------------
80  // Reading
81  // -------------------------------------------------------------
82 
83  Matrix<T> read( std::shared_ptr<BaseInputSource> source ) const
84  {
85  utils::InputStream is( source );
86  return parse_( is );
87  }
88 
89  // -------------------------------------------------------------
90  // Properties
91  // -------------------------------------------------------------
92 
93  bool skip_first_col() const
94  {
95  return skip_first_col_;
96  }
97 
98  bool skip_first_row() const
99  {
100  return skip_first_row_;
101  }
102 
104  {
105  skip_first_col_ = value;
106  return *this;
107  }
108 
110  {
111  skip_first_row_ = value;
112  return *this;
113  }
114 
116  {
117  return reader_;
118  }
119 
120  CsvReader const& csv_reader() const
121  {
122  return reader_;
123  }
124 
125  MatrixReader& parse_value_functor( std::function<T( std::string const& )> functor )
126  {
127  parse_value_ = functor;
128  }
129 
130  // -------------------------------------------------------------
131  // Internal Functions
132  // -------------------------------------------------------------
133 
134 private:
135 
136  Matrix<T> parse_( utils::InputStream& input_stream ) const
137  {
138  // We collect data in a vector first, because resizing a Matrix is hard.
139  std::vector<T> table;
140  size_t cols = 0;
141 
142  // Early stop.
143  if( ! input_stream ) {
144  return {};
145  }
146 
147  // Skip first line if needed.
148  if( skip_first_row_ ) {
149  reader_.parse_line( input_stream );
150  }
151 
152  while( input_stream ) {
153  auto const line = reader_.parse_line( input_stream );
154 
155  // Get the measurements of the interesting part of the line.
156  auto first = 0;
157  auto len = line.size();
158  if( len > 0 && skip_first_col_ ) {
159  first = 1;
160  --len;
161  }
162 
163  // Check that line length is consisent. Cols == 0 means we just started.
164  if( cols == 0 ) {
165 
166  // Matrix with zero length colums is empty, no matter how many rows it has.
167  if( len == 0 ) {
168  return {};
169  // throw std::runtime_error( "Cannot read Matrix with empty lines." );
170  }
171 
172  // Store the col length.
173  cols = len;
174 
175  } else if( cols != len ) {
176  throw std::runtime_error( "Matrix has different line lengths." );
177  }
178 
179  // Parse and transfer the data. User specified parser or default one.
180  if( parse_value_ ) {
181  for( size_t i = first; i < line.size(); ++i ) {
182  table.push_back( parse_value_( line[i] ) );
183  }
184  } else {
185  for( size_t i = first; i < line.size(); ++i ) {
186  table.push_back( parse_value_stringstream_( line[i] ) );
187  }
188  }
189  }
190 
191  // We cannot properly calculate dimensions of an empty matrix. So better return here.
192  if( table.size() == 0 ) {
193  assert( cols == 0 );
194  return {};
195  }
196 
197  // Make sure that the table as a matrix shape.
198  if( table.size() % cols != 0 ) {
199  // I'm pretty sure this should be an assertion. But better check it all the time,
200  // and throw some confusing incomprehensible error message.
201  throw std::runtime_error( "Matrix is not rectangluar." );
202  }
203 
204  // Make a proper Matrix.
205  size_t const rows = table.size() / cols;
206  return Matrix<T>( rows, cols, std::move(table) );
207  }
208 
209  inline T parse_value_stringstream_( std::string const& cell ) const
210  {
211  std::stringstream ss( cell );
212  T value;
213  ss >> value;
214  return value;
215  }
216 
217  // -------------------------------------------------------------
218  // Data Members
219  // -------------------------------------------------------------
220 
221 private:
222 
223  bool skip_first_row_ = false;
224  bool skip_first_col_ = false;
225 
226  CsvReader reader_;
227 
228  std::function<T( std::string const& )> parse_value_;
229 
230 };
231 
232 } // namespace utils
233 } // namespace genesis
234 
235 #endif // include guard
Matrix< T > read(std::shared_ptr< BaseInputSource > source) const
MatrixReader & parse_value_functor(std::function< T(std::string const &)> functor)
MatrixReader & operator=(MatrixReader const &)=default
CsvReader & separator_chars(std::string const &chars)
Set the chars used to separate fields of the CSV data.
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
Provides some valuable additions to STD.
std::vector< std::string > parse_line(utils::InputStream &input_stream) const
Parse one line of the CSV data and return it.
Read Comma/Character Separated Values (CSV) data and other delimiter-separated formats.
Stream interface for reading data from an InputSource, that keeps track of line and column counters...