A toolkit for working with phylogenetic data.
v0.19.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
utils/containers/matrix/reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_CONTAINERS_MATRIX_READER_H_
2 #define GENESIS_UTILS_CONTAINERS_MATRIX_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
38 
39 #include <functional>
40 #include <stdexcept>
41 #include <sstream>
42 #include <string>
43 #include <vector>
44 
45 namespace genesis {
46 namespace utils {
47 
48 // =================================================================================================
49 // MatrixReader
50 // =================================================================================================
51 
52 template <typename T>
54 {
55 public:
56 
57  // -------------------------------------------------------------
58  // Constructors and Rule of Five
59  // -------------------------------------------------------------
60 
61  MatrixReader( char separator_char = '\t' )
62  {
63  reader_.separator_chars( std::string( 1, separator_char ));
64  }
65 
66  MatrixReader( CsvReader const& reader )
67  : reader_( reader )
68  {}
69 
70  ~MatrixReader() = default;
71 
72  MatrixReader(MatrixReader const&) = default;
73  MatrixReader(MatrixReader&&) = default;
74 
75  MatrixReader& operator= (MatrixReader const&) = default;
76  MatrixReader& operator= (MatrixReader&&) = default;
77 
78  // -------------------------------------------------------------
79  // Reading
80  // -------------------------------------------------------------
81 
82  Matrix<T> from_stream( std::istream& is ) const
83  {
84  utils::InputStream it( utils::make_unique< utils::StreamInputSource >( is ));
85  return parse_( it );
86  }
87 
88  Matrix<T> from_file ( std::string const& fn ) const
89  {
90  utils::InputStream it( utils::make_unique< utils::FileInputSource >( fn ));
91  return parse_( it );
92  }
93 
94  Matrix<T> from_string( std::string const& fs ) const
95  {
96  utils::InputStream it( utils::make_unique< utils::StringInputSource >( fs ));
97  return parse_( it );
98  }
99 
100  // -------------------------------------------------------------
101  // Properties
102  // -------------------------------------------------------------
103 
104  bool skip_first_col() const
105  {
106  return skip_first_col_;
107  }
108 
109  bool skip_first_row() const
110  {
111  return skip_first_row_;
112  }
113 
115  {
116  skip_first_col_ = value;
117  return *this;
118  }
119 
121  {
122  skip_first_row_ = value;
123  return *this;
124  }
125 
127  {
128  return reader_;
129  }
130 
131  CsvReader const& csv_reader() const
132  {
133  return reader_;
134  }
135 
136  MatrixReader& parse_value_functor( std::function<T( std::string const& )> functor )
137  {
138  parse_value_ = functor;
139  }
140 
141  // -------------------------------------------------------------
142  // Internal Functions
143  // -------------------------------------------------------------
144 
145 private:
146 
147  Matrix<T> parse_( utils::InputStream& input_stream ) const
148  {
149  // We collect data in a vector first, because resizing a Matrix is hard.
150  std::vector<T> table;
151  size_t cols = 0;
152 
153  // Early stop.
154  if( ! input_stream ) {
155  return {};
156  }
157 
158  // Skip first line if needed.
159  if( skip_first_row_ ) {
160  reader_.parse_line( input_stream );
161  }
162 
163  while( input_stream ) {
164  auto const line = reader_.parse_line( input_stream );
165 
166  // Get the measurements of the interesting part of the line.
167  auto first = 0;
168  auto len = line.size();
169  if( len > 0 && skip_first_col_ ) {
170  first = 1;
171  --len;
172  }
173 
174  // Check that line length is consisent. Cols == 0 means we just started.
175  if( cols == 0 ) {
176 
177  // Matrix with zero length colums is empty, no matter how many rows it has.
178  if( len == 0 ) {
179  return {};
180  // throw std::runtime_error( "Cannot read Matrix with empty lines." );
181  }
182 
183  // Store the col length.
184  cols = len;
185 
186  } else if( cols != len ) {
187  throw std::runtime_error( "Matrix has different line lengths." );
188  }
189 
190  // Parse and transfer the data. User specified parser or default one.
191  if( parse_value_ ) {
192  for( size_t i = first; i < line.size(); ++i ) {
193  table.push_back( parse_value_( line[i] ) );
194  }
195  } else {
196  for( size_t i = first; i < line.size(); ++i ) {
197  table.push_back( parse_value_stringstream_( line[i] ) );
198  }
199  }
200  }
201 
202  // We cannot properly calculate dimensions of an empty matrix. So better return here.
203  if( table.size() == 0 ) {
204  assert( cols == 0 );
205  return {};
206  }
207 
208  // Make sure that the table as a matrix shape.
209  if( table.size() % cols != 0 ) {
210  // I'm pretty sure this should be an assertion. But better check it all the time,
211  // and throw some confusing incomprehensible error message.
212  throw std::runtime_error( "Matrix is not rectangluar." );
213  }
214 
215  // Make a proper Matrix.
216  size_t const rows = table.size() / cols;
217  return Matrix<T>( rows, cols, std::move(table) );
218  }
219 
220  inline T parse_value_stringstream_( std::string const& cell ) const
221  {
222  std::stringstream ss( cell );
223  T value;
224  ss >> value;
225  return value;
226  }
227 
228  // -------------------------------------------------------------
229  // Data Members
230  // -------------------------------------------------------------
231 
232 private:
233 
234  bool skip_first_row_ = false;
235  bool skip_first_col_ = false;
236 
237  CsvReader reader_;
238 
239  std::function<T( std::string const& )> parse_value_;
240 
241 };
242 
243 } // namespace utils
244 } // namespace genesis
245 
246 #endif // include guard
MatrixReader & parse_value_functor(std::function< T(std::string const &)> functor)
MatrixReader & operator=(MatrixReader const &)=default
Matrix< T > from_file(std::string const &fn) const
CsvReader & separator_chars(std::string const &chars)
Set the chars used to separate fields of the CSV data.
std::vector< std::string > parse_line(utils::InputStream &input_stream) const
Parse one line of the CSV data and return it.
Provides some valuable additions to STD.
Matrix< T > from_string(std::string const &fs) const
Read Comma Separated Values (CSV) data and other delimiter-separated formats.
Matrix< T > from_stream(std::istream &is) const
Stream interface for reading data from an InputSource, that keeps track of line and column counters...