A library for working with phylogenetic and population genetic data.
v0.32.0
utils/containers/matrix/simple_reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_CONTAINERS_MATRIX_SIMPLE_READER_H_
2 #define GENESIS_UTILS_CONTAINERS_MATRIX_SIMPLE_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2023 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
43 
44 #include <cassert>
45 #include <cstdlib>
46 #include <functional>
47 #include <sstream>
48 #include <stdexcept>
49 #include <string>
50 #include <vector>
51 
52 namespace genesis {
53 namespace utils {
54 
55 // =================================================================================================
56 // MatrixSimpleReader
57 // =================================================================================================
58 
59 template <typename T>
61 {
62 public:
63 
64  // -------------------------------------------------------------
65  // Constructors and Rule of Five
66  // -------------------------------------------------------------
67 
68  explicit MatrixSimpleReader( char separator_char = '\t' )
69  : separator_char_( separator_char )
70  {}
71  ~MatrixSimpleReader() = default;
72 
73  MatrixSimpleReader(MatrixSimpleReader const&) = default;
75 
78 
79  // -------------------------------------------------------------
80  // Reading
81  // -------------------------------------------------------------
82 
83  Matrix<T> read( std::shared_ptr<BaseInputSource> source ) const
84  {
85  utils::InputStream is( source );
86  return parse_( is );
87  }
88 
89  // -------------------------------------------------------------
90  // Properties
91  // -------------------------------------------------------------
92 
93  char separator_char() const
94  {
95  return separator_char_;
96  }
97 
98  bool skip_first_col() const
99  {
100  return skip_first_col_;
101  }
102 
103  bool skip_first_row() const
104  {
105  return skip_first_row_;
106  }
107 
109  {
110  separator_char_ = value;
111  return *this;
112  }
113 
115  {
116  skip_first_col_ = value;
117  return *this;
118  }
119 
121  {
122  skip_first_row_ = value;
123  return *this;
124  }
125 
133  {
134  parse_value_ = functor;
135  return *this;
136  }
137 
138  MatrixSimpleReader& convert_value_functor( std::function<T( std::string const& )> functor )
139  {
140  convert_value_ = functor;
141  return *this;
142  }
143 
144  // -------------------------------------------------------------
145  // Internal Functions
146  // -------------------------------------------------------------
147 
148 private:
149 
150  Matrix<T> parse_( utils::InputStream& input_stream ) const
151  {
152  auto& it = input_stream;
153 
154  // We collect data in a vector first, because resizing a Matrix is hard.
155  std::vector<T> table;
156  size_t cols = 0;
157 
158  // Early stop.
159  if( ! it ) {
160  return {};
161  }
162 
163  // Skip first line if needed.
164  if( skip_first_row_ ) {
165  it.get_line();
166  }
167 
168  // Read the whole input
169  while( it ) {
170 
171  // Skip first column of needed.
172  if( skip_first_col_ ) {
173  while( it && *it != separator_char_ && *it != '\n' ) {
174  ++it;
175  }
176  assert( !it || *it == separator_char_ || *it == '\n' );
177  ++it;
178  }
179 
180  // Read the rest of the line into the table.
181  auto const line_length = parse_line_( it, table );
182 
183  // Check that line length is consisent. Cols == 0 means we just started.
184  if( cols == 0 ) {
185  // Edge case, no columns found at all.
186  // A Matrix with zero length colums is empty, no matter how many rows it has.
187  if( line_length == 0 ) {
188  return {};
189  }
190 
191  // Store the col length.
192  cols = line_length;
193  } else if( cols != line_length ) {
194  throw std::runtime_error(
195  "In " + it.source_name() + " at " + it.at() + ": " +
196  "Different line lengths. Stareted with " + std::to_string( cols ) +
197  ", now found " + std::to_string( line_length )
198  );
199  }
200  }
201 
202  // We cannot properly calculate dimensions of an empty matrix. So better return here.
203  if( table.size() == 0 ) {
204  assert( cols == 0 );
205  return {};
206  }
207 
208  // Make sure that the table as a matrix shape.
209  if( table.size() % cols != 0 ) {
210  // I'm pretty sure this could be an assertion. But better check it all the time,
211  // and throw some confusing incomprehensible error message.
212  throw std::runtime_error( "Matrix is not rectangluar." );
213  }
214 
215  // Make a proper Matrix.
216  size_t const rows = table.size() / cols;
217  return Matrix<T>( rows, cols, std::move(table) );
218  }
219 
220  size_t parse_line_( utils::InputStream& input_stream, std::vector<T>& table ) const
221  {
222  auto& it = input_stream;
223  size_t cnt = 0;
224  while( it && *it != '\n' ) {
225  // Parse the next field.
226  table.push_back( parse_field_( it ));
227  ++cnt;
228 
229  // Check that everything is in order.
230  if( it && *it != separator_char_ && *it != '\n' ) {
231  throw std::runtime_error(
232  "In " + it.source_name() + " at " + it.at() + ": " +
233  "Unexpected char " + char_to_hex( *it )
234  );
235  }
236  assert( !it || *it == separator_char_ || *it == '\n' );
237  if( it && *it == separator_char_ ) {
238  ++it;
239  }
240  }
241  assert( !it || *it == '\n' );
242  ++it;
243  return cnt;
244  }
245 
246  T parse_field_( utils::InputStream& input_stream ) const
247  {
248  T result;
249  auto& it = input_stream;
250  if( parse_value_ ) {
251  // If we are given a complete parser function by the user, use that.
252  result = parse_value_( it );
253  } else {
254  // Otherwise, we read to string first...
255  std::string value;
256  while( it && *it != separator_char_ && *it != '\n' ) {
257  value += *it;
258  ++it;
259  }
260 
261  // ... and then convert to the value type we want.
262  if( convert_value_ ) {
263  // If we are given a conversion function by the user, use that.
264  result = convert_value_( value );
265  } else {
266  // Otherwise, use a generic one, that utilizes stream input.
267  // Expensive, but works.
268  try {
269  result = convert_from_string<T>( value );
270  } catch(...) {
271  throw std::runtime_error(
272  "In " + it.source_name() + " at " + it.at() + ": " +
273  "Invalid value \"" + value + "\""
274  );
275  }
276  }
277  }
278  return result;
279  }
280 
281  // -------------------------------------------------------------
282  // Data Members
283  // -------------------------------------------------------------
284 
285 private:
286 
287  char separator_char_ = '\t';
288  bool skip_first_row_ = false;
289  bool skip_first_col_ = false;
290 
291  std::function<T( utils::InputStream& )> parse_value_;
292  std::function<T( std::string const& )> convert_value_;
293 
294 };
295 
296 } // namespace utils
297 } // namespace genesis
298 
299 #endif // include guard
genesis::utils::InputStream
Stream interface for reading data from an InputSource, that keeps track of line and column counters.
Definition: input_stream.hpp:88
parser.hpp
genesis::utils::MatrixSimpleReader::skip_first_col
bool skip_first_col() const
Definition: utils/containers/matrix/simple_reader.hpp:98
genesis::utils::MatrixSimpleReader::read
Matrix< T > read(std::shared_ptr< BaseInputSource > source) const
Definition: utils/containers/matrix/simple_reader.hpp:83
genesis::utils::MatrixSimpleReader::MatrixSimpleReader
MatrixSimpleReader(char separator_char='\t')
Definition: utils/containers/matrix/simple_reader.hpp:68
genesis::utils::MatrixSimpleReader::separator_char
MatrixSimpleReader & separator_char(char value)
Definition: utils/containers/matrix/simple_reader.hpp:108
genesis::utils::MatrixSimpleReader::skip_first_row
MatrixSimpleReader & skip_first_row(bool value)
Definition: utils/containers/matrix/simple_reader.hpp:120
std.hpp
Provides some valuable additions to STD.
genesis::utils::MatrixSimpleReader::separator_char
char separator_char() const
Definition: utils/containers/matrix/simple_reader.hpp:93
input_source.hpp
genesis::utils::Matrix
Definition: placement/function/emd.hpp:53
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: function/genome_locus.hpp:52
string.hpp
Provides some commonly used string utility functions.
input_stream.hpp
genesis::utils::MatrixSimpleReader
Definition: utils/containers/matrix/simple_reader.hpp:60
genesis::utils::MatrixSimpleReader::~MatrixSimpleReader
~MatrixSimpleReader()=default
genesis::utils::MatrixSimpleReader::parse_value_functor
MatrixSimpleReader & parse_value_functor(std::function< T(utils::InputStream &)> functor)
Provide a function to parse an InputStream and return a value of the Matrix.
Definition: utils/containers/matrix/simple_reader.hpp:132
matrix.hpp
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::InputStream::get_line
void get_line(std::string &target)
Read the current line, append it to the target, and move to the beginning of the next line.
Definition: input_stream.cpp:127
genesis::utils::MatrixSimpleReader::skip_first_row
bool skip_first_row() const
Definition: utils/containers/matrix/simple_reader.hpp:103
convert.hpp
char.hpp
genesis::utils::MatrixSimpleReader::operator=
MatrixSimpleReader & operator=(MatrixSimpleReader const &)=default
genesis::utils::MatrixSimpleReader::skip_first_col
MatrixSimpleReader & skip_first_col(bool value)
Definition: utils/containers/matrix/simple_reader.hpp:114
genesis::utils::char_to_hex
std::string char_to_hex(char c, bool full)
Return the name and hex representation of a char.
Definition: char.cpp:118
scanner.hpp
genesis::utils::MatrixSimpleReader::convert_value_functor
MatrixSimpleReader & convert_value_functor(std::function< T(std::string const &)> functor)
Definition: utils/containers/matrix/simple_reader.hpp:138