A library for working with phylogenetic and population genetic data.
v0.27.0
simple_pileup_input_iterator.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_FORMATS_SIMPLE_PILEUP_INPUT_ITERATOR_H_
2 #define GENESIS_POPULATION_FORMATS_SIMPLE_PILEUP_INPUT_ITERATOR_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2022 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
37 
38 #include <string>
39 
40 namespace genesis {
41 namespace population {
42 
43 // =================================================================================================
44 // Simple Pileup Input Iterator
45 // =================================================================================================
46 
78 template<class T = SimplePileupReader::Record>
80 {
81 public:
82 
83  // -------------------------------------------------------------------------
84  // Member Types
85  // -------------------------------------------------------------------------
86 
88  using value_type = T;
89  using pointer = value_type*;
91  using const_reference = value_type const&;
92  using difference_type = std::ptrdiff_t;
93  using iterator_category = std::input_iterator_tag;
94 
95  // -------------------------------------------------------------------------
96  // Constructors and Rule of Five
97  // -------------------------------------------------------------------------
98 
102  SimplePileupInputIterator() = default;
103 
109  std::shared_ptr< utils::BaseInputSource > source,
110  SimplePileupReader const& reader = {}
111  )
112  : input_stream_( std::make_shared<utils::InputStream>( source ))
113  , reader_( reader )
114  {
115  // Read the first record of the file.
116  increment();
117  }
118 
119  // /* *
120  // * @brief Create an instance that reads from an input source, using only the samples at the
121  // * indices given in the @p sample_indices, and optionally take a SimplePileupReader with
122  // * settings to be used.
123  // */
124  // SimplePileupInputIterator(
125  // std::shared_ptr< utils::BaseInputSource > source,
126  // std::vector<size_t> const& sample_indices,
127  // SimplePileupReader const& reader = {}
128  // )
129  // : input_stream_( std::make_shared<utils::InputStream>( source ))
130  // , reader_( reader )
131  // , use_sample_filter_( true )
132  // {
133  // // Prepare the sample filter from the indices.
134  // sample_filter_ = utils::make_bool_vector_from_indices( sample_indices );
135  //
136  // // Read the first record of the file.
137  // increment();
138  // }
139 
146  std::shared_ptr< utils::BaseInputSource > source,
147  std::vector<bool> const& sample_filter,
148  SimplePileupReader const& reader = {}
149  )
150  : input_stream_( std::make_shared<utils::InputStream>( source ))
151  , reader_( reader )
152  , sample_filter_( sample_filter )
153  , use_sample_filter_( true )
154  {
155  // Read the first record of the file.
156  increment();
157  }
158 
159  ~SimplePileupInputIterator() = default;
160 
161  SimplePileupInputIterator( self_type const& ) = default;
162  SimplePileupInputIterator( self_type&& ) = default;
163 
164  self_type& operator= ( self_type const& ) = default;
165  self_type& operator= ( self_type&& ) = default;
166 
167  // -------------------------------------------------------------------------
168  // Comparators
169  // -------------------------------------------------------------------------
170 
174  explicit operator bool() const
175  {
176  return good_;
177  }
178 
179  bool good() const
180  {
181  return good_;
182  }
183 
184  // -------------------------------------------------------------------------
185  // Accessors
186  // -------------------------------------------------------------------------
187 
188  T const& record() const
189  {
190  return record_;
191  }
192 
193  T& record()
194  {
195  return record_;
196  }
197 
198  T const* operator ->() const
199  {
200  return &record_;
201  }
202 
204  {
205  return &record_;
206  }
207 
208  T const& operator*() const
209  {
210  return record_;
211  }
212 
214  {
215  return record_;
216  }
217 
218  // -------------------------------------------------------------------------
219  // Iteration
220  // -------------------------------------------------------------------------
221 
223  {
224  increment();
225  return *this;
226  }
227 
229  {
230  increment();
231  return *this;
232  }
233 
234  void increment()
235  {
236  increment_();
237  }
238 
239  bool operator==( self_type const& it ) const
240  {
241  return good_ == it.good_;
242  }
243 
244  bool operator!=( self_type const& it ) const
245  {
246  return !(*this == it);
247  }
248 
249  // -------------------------------------------------------------------------
250  // Internal Members
251  // -------------------------------------------------------------------------
252 
253 private:
254 
255  void increment_();
256 
257  // -------------------------------------------------------------------------
258  // Data Members
259  // -------------------------------------------------------------------------
260 
261 private:
262 
263  // Basic iterator setup and input.
264  bool good_ = false;
265  std::shared_ptr<utils::InputStream> input_stream_;
266 
267  // Reading into records
268  T record_;
269  SimplePileupReader reader_;
270 
271  // Sample filtering
272  std::vector<bool> sample_filter_;
273  bool use_sample_filter_ = false;
274 };
275 
276 // -------------------------------------------------------------------------
277 // Explicit Specialiyations in Namespace Scope
278 // -------------------------------------------------------------------------
279 
280 template<>
281 inline void SimplePileupInputIterator<SimplePileupReader::Record>::increment_()
282 {
283  // Read into temp object, so that we have the previous one still available.
284  SimplePileupReader::Record tmp;
285  if( use_sample_filter_ ) {
286  good_ = reader_.parse_line_record( *input_stream_, tmp, sample_filter_ );
287  } else {
288  good_ = reader_.parse_line_record( *input_stream_, tmp );
289  }
290 
291  // Make sure that the input is sorted.
292  if( good_ &&
293  (
294  ( tmp.chromosome < record_.chromosome ) ||
295  ( tmp.chromosome == record_.chromosome && tmp.position <= record_.position )
296  )
297  ) {
298  throw std::runtime_error(
299  "Malformed pileup " + input_stream_->source_name() + " at " + input_stream_->at() +
300  ": unordered chromosomes and positions"
301  );
302  }
303  record_ = std::move( tmp );
304 }
305 
306 template<>
307 inline void SimplePileupInputIterator<Variant>::increment_()
308 {
309  // Read into temp object, so that we have the previous one still available for the order
310  // comparison downstream. We want to make some form of copy or renewal anyway, as the iterator
311  // might have been used to obtain the Variant by move, in which case we in particular need
312  // to properly reset the BaseCount sample field of the Variant. To do that efficiently,
313  // and also tap into the error check for correct consistent number of samples per line,
314  // we resize to what the current record is. That way, the check in SimplePileupReader will
315  // kick in, while at the same time providing speed improvemetns due to not having to add
316  // BaseCounts one by one when reading.
317  // Initially, the record has sample size 0, which also works, as this is how the reader starts.
318  Variant tmp;
319  tmp.samples.resize( record_.samples.size() );
320  if( use_sample_filter_ ) {
321  good_ = reader_.parse_line_variant( *input_stream_, tmp, sample_filter_ );
322  } else {
323  good_ = reader_.parse_line_variant( *input_stream_, tmp );
324  }
325 
326  // Make sure that the input is sorted.
327  if( good_ &&
328  (
329  ( tmp.chromosome < record_.chromosome ) ||
330  ( tmp.chromosome == record_.chromosome && tmp.position <= record_.position )
331  )
332  ) {
333  throw std::runtime_error(
334  "Malformed pileup " + input_stream_->source_name() + " at " + input_stream_->at() +
335  ": unordered chromosomes and positions"
336  );
337  }
338  record_ = std::move( tmp );
339 }
340 
341 } // namespace population
342 } // namespace genesis
343 
344 #endif // include guard
genesis::population::SimplePileupInputIterator::self_type
SimplePileupInputIterator self_type
Definition: simple_pileup_input_iterator.hpp:87
helper.hpp
genesis::population::SimplePileupReader
Reader for line-by-line assessment of (m)pileup files.
Definition: simple_pileup_reader.hpp:77
genesis::population::SimplePileupInputIterator::operator!=
bool operator!=(self_type const &it) const
Definition: simple_pileup_input_iterator.hpp:244
genesis::population::SimplePileupInputIterator::operator=
self_type & operator=(self_type const &)=default
genesis::population::SimplePileupInputIterator::increment
void increment()
Definition: simple_pileup_input_iterator.hpp:234
genesis::population::SimplePileupInputIterator::operator->
T const * operator->() const
Definition: simple_pileup_input_iterator.hpp:198
genesis::population::SimplePileupInputIterator::record
T & record()
Definition: simple_pileup_input_iterator.hpp:193
genesis::population::SimplePileupInputIterator::value_type
T value_type
Definition: simple_pileup_input_iterator.hpp:88
input_source.hpp
genesis::population::SimplePileupInputIterator::operator*
T & operator*()
Definition: simple_pileup_input_iterator.hpp:213
genesis::population::SimplePileupInputIterator::reference
value_type & reference
Definition: simple_pileup_input_iterator.hpp:90
genesis::population::SimplePileupInputIterator::difference_type
std::ptrdiff_t difference_type
Definition: simple_pileup_input_iterator.hpp:92
genesis::population::SimplePileupInputIterator::record
T const & record() const
Definition: simple_pileup_input_iterator.hpp:188
genesis::population::SimplePileupInputIterator::operator*
T const & operator*() const
Definition: simple_pileup_input_iterator.hpp:208
genesis::population::SimplePileupInputIterator::SimplePileupInputIterator
SimplePileupInputIterator(std::shared_ptr< utils::BaseInputSource > source, SimplePileupReader const &reader={})
Create an instance that reads from an input source, and optionally take a SimplePileupReader with set...
Definition: simple_pileup_input_iterator.hpp:108
genesis::population::SimplePileupInputIterator
Iterate an input source and parse it as a (m)pileup file.
Definition: simple_pileup_input_iterator.hpp:79
simple_pileup_reader.hpp
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::SimplePileupInputIterator::good
bool good() const
Definition: simple_pileup_input_iterator.hpp:179
genesis::population::SimplePileupInputIterator::~SimplePileupInputIterator
~SimplePileupInputIterator()=default
genesis::population::SimplePileupInputIterator::SimplePileupInputIterator
SimplePileupInputIterator()=default
Create a default instance, with no input.
genesis::population::SimplePileupInputIterator::pointer
value_type * pointer
Definition: simple_pileup_input_iterator.hpp:89
genesis::population::SimplePileupReader::parse_line_record
bool parse_line_record(utils::InputStream &input_stream, Record &record) const
Read an (m)pileup line, as a Record.
Definition: simple_pileup_reader.cpp:189
genesis::population::SimplePileupInputIterator::SimplePileupInputIterator
SimplePileupInputIterator(std::shared_ptr< utils::BaseInputSource > source, std::vector< bool > const &sample_filter, SimplePileupReader const &reader={})
Create an instance that reads from an input source, using only the samples at the indices where the s...
Definition: simple_pileup_input_iterator.hpp:145
genesis::population::SimplePileupInputIterator::operator==
bool operator==(self_type const &it) const
Definition: simple_pileup_input_iterator.hpp:239
genesis::population::SimplePileupInputIterator::const_reference
value_type const & const_reference
Definition: simple_pileup_input_iterator.hpp:91
genesis::population::SimplePileupInputIterator::iterator_category
std::input_iterator_tag iterator_category
Definition: simple_pileup_input_iterator.hpp:93
genesis::population::SimplePileupInputIterator::operator++
self_type & operator++()
Definition: simple_pileup_input_iterator.hpp:222