A library for working with phylogenetic and population genetic data.
v0.32.0
utils/containers/matrix/reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_CONTAINERS_MATRIX_READER_H_
2 #define GENESIS_UTILS_CONTAINERS_MATRIX_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2023 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
41 
42 #include <cstdlib>
43 #include <functional>
44 #include <sstream>
45 #include <stdexcept>
46 #include <string>
47 #include <vector>
48 
49 namespace genesis {
50 namespace utils {
51 
52 // =================================================================================================
53 // MatrixReader
54 // =================================================================================================
55 
56 template <typename T>
58 {
59 public:
60 
61  // -------------------------------------------------------------
62  // Constructors and Rule of Five
63  // -------------------------------------------------------------
64 
65  explicit MatrixReader( std::string const& separator = "\t" )
66  {
67  reader_.separator_chars( separator );
68  }
69 
70  explicit MatrixReader( CsvReader const& reader )
71  : reader_( reader )
72  {}
73 
74  ~MatrixReader() = default;
75 
76  MatrixReader(MatrixReader const&) = default;
77  MatrixReader(MatrixReader&&) = default;
78 
79  MatrixReader& operator= (MatrixReader const&) = default;
80  MatrixReader& operator= (MatrixReader&&) = default;
81 
82  // -------------------------------------------------------------
83  // Reading
84  // -------------------------------------------------------------
85 
86  Matrix<T> read( std::shared_ptr<BaseInputSource> source ) const
87  {
88  utils::InputStream is( source );
89  return parse_( is );
90  }
91 
92  // -------------------------------------------------------------
93  // Properties
94  // -------------------------------------------------------------
95 
96  bool skip_first_col() const
97  {
98  return skip_first_col_;
99  }
100 
101  bool skip_first_row() const
102  {
103  return skip_first_row_;
104  }
105 
107  {
108  skip_first_col_ = value;
109  return *this;
110  }
111 
113  {
114  skip_first_row_ = value;
115  return *this;
116  }
117 
119  {
120  return reader_;
121  }
122 
123  CsvReader const& csv_reader() const
124  {
125  return reader_;
126  }
127 
128  MatrixReader& convert_value_functor( std::function<T( std::string const& )> functor )
129  {
130  convert_value_ = functor;
131  return *this;
132  }
133 
134  // -------------------------------------------------------------
135  // Internal Functions
136  // -------------------------------------------------------------
137 
138 private:
139 
140  Matrix<T> parse_( utils::InputStream& input_stream ) const
141  {
142  // We collect data in a vector first, because resizing a Matrix is hard.
143  std::vector<T> table;
144  size_t cols = 0;
145 
146  // Early stop.
147  if( ! input_stream ) {
148  return {};
149  }
150 
151  // Skip first line if needed.
152  if( skip_first_row_ ) {
153  reader_.parse_line( input_stream );
154  }
155 
156  while( input_stream ) {
157  auto const line = reader_.parse_line( input_stream );
158 
159  // Get the measurements of the interesting part of the line.
160  auto first = 0;
161  auto len = line.size();
162  if( len > 0 && skip_first_col_ ) {
163  first = 1;
164  --len;
165  }
166 
167  // Check that line length is consisent. Cols == 0 means we just started.
168  if( cols == 0 ) {
169 
170  // Matrix with zero length colums is empty, no matter how many rows it has.
171  if( len == 0 ) {
172  return {};
173  // throw std::runtime_error( "Cannot read Matrix with empty lines." );
174  }
175 
176  // Store the col length.
177  cols = len;
178 
179  } else if( cols != len ) {
180  throw std::runtime_error( "Matrix has different line lengths." );
181  }
182 
183  // Parse and transfer the data. User specified parser or default one.
184  if( convert_value_ ) {
185  for( size_t i = first; i < line.size(); ++i ) {
186  table.push_back( convert_value_( line[i] ) );
187  }
188  } else {
189  for( size_t i = first; i < line.size(); ++i ) {
190  try {
191  table.push_back( convert_from_string<T>( line[i] ));
192  } catch(...) {
193  throw std::runtime_error(
194  "In " + input_stream.source_name() + " line " +
195  std::to_string( input_stream.line() - 1 ) + ": "
196  "Cannot parse value \"" + line[i] + "\" into Matrix. "
197  );
198  }
199  }
200  }
201  }
202 
203  // We cannot properly calculate dimensions of an empty matrix. So better return here.
204  if( table.size() == 0 ) {
205  assert( cols == 0 );
206  return {};
207  }
208 
209  // Make sure that the table as a matrix shape.
210  if( table.size() % cols != 0 ) {
211  // I'm pretty sure this should be an assertion. But better check it all the time,
212  // and throw some confusing incomprehensible error message.
213  throw std::runtime_error( "Matrix is not rectangluar." );
214  }
215 
216  // Make a proper Matrix.
217  size_t const rows = table.size() / cols;
218  return Matrix<T>( rows, cols, std::move(table) );
219  }
220 
221  // -------------------------------------------------------------
222  // Data Members
223  // -------------------------------------------------------------
224 
225 private:
226 
227  bool skip_first_row_ = false;
228  bool skip_first_col_ = false;
229 
230  CsvReader reader_;
231 
232  std::function<T( std::string const& )> convert_value_;
233 
234 };
235 
236 } // namespace utils
237 } // namespace genesis
238 
239 #endif // include guard
genesis::utils::MatrixReader::skip_first_col
bool skip_first_col() const
Definition: utils/containers/matrix/reader.hpp:96
genesis::utils::InputStream
Stream interface for reading data from an InputSource, that keeps track of line and column counters.
Definition: input_stream.hpp:88
genesis::utils::MatrixReader::skip_first_row
bool skip_first_row() const
Definition: utils/containers/matrix/reader.hpp:101
genesis::utils::CsvReader::parse_line
std::vector< std::string > parse_line(utils::InputStream &input_stream) const
Parse one line of the CSV data and return it.
Definition: utils/formats/csv/reader.cpp:160
genesis::utils::CsvReader::separator_chars
CsvReader & separator_chars(std::string const &chars)
Set the chars used to separate fields of the CSV data.
Definition: utils/formats/csv/reader.hpp:259
genesis::utils::InputStream::source_name
std::string source_name() const
Get the input source name where this stream reads from.
Definition: input_stream.hpp:478
genesis::utils::MatrixReader::convert_value_functor
MatrixReader & convert_value_functor(std::function< T(std::string const &)> functor)
Definition: utils/containers/matrix/reader.hpp:128
genesis::utils::MatrixReader::csv_reader
CsvReader & csv_reader()
Definition: utils/containers/matrix/reader.hpp:118
genesis::utils::MatrixReader::MatrixReader
MatrixReader(CsvReader const &reader)
Definition: utils/containers/matrix/reader.hpp:70
genesis::utils::MatrixReader::MatrixReader
MatrixReader(std::string const &separator="\t")
Definition: utils/containers/matrix/reader.hpp:65
std.hpp
Provides some valuable additions to STD.
input_source.hpp
genesis::utils::MatrixReader::csv_reader
CsvReader const & csv_reader() const
Definition: utils/containers/matrix/reader.hpp:123
reader.hpp
genesis::utils::MatrixReader::~MatrixReader
~MatrixReader()=default
genesis::utils::Matrix
Definition: placement/function/emd.hpp:53
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: function/genome_locus.hpp:52
string.hpp
Provides some commonly used string utility functions.
input_stream.hpp
genesis::utils::MatrixReader
Definition: utils/containers/matrix/reader.hpp:57
matrix.hpp
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
convert.hpp
genesis::utils::MatrixReader::read
Matrix< T > read(std::shared_ptr< BaseInputSource > source) const
Definition: utils/containers/matrix/reader.hpp:86
genesis::utils::InputStream::line
size_t line() const
Return the current line of the input stream.
Definition: input_stream.hpp:417
genesis::utils::MatrixReader::operator=
MatrixReader & operator=(MatrixReader const &)=default
genesis::utils::MatrixReader::skip_first_row
MatrixReader & skip_first_row(bool value)
Definition: utils/containers/matrix/reader.hpp:112
genesis::utils::CsvReader
Read Comma/Character Separated Values (CSV) data and other delimiter-separated formats.
Definition: utils/formats/csv/reader.hpp:70
genesis::utils::MatrixReader::skip_first_col
MatrixReader & skip_first_col(bool value)
Definition: utils/containers/matrix/reader.hpp:106