A library for working with phylogenetic and population genetic data.
v0.27.0
sync_input_iterator.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_FORMATS_SYNC_INPUT_ITERATOR_H_
2 #define GENESIS_POPULATION_FORMATS_SYNC_INPUT_ITERATOR_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2022 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
36 
37 #include <string>
38 #include <vector>
39 
40 namespace genesis {
41 namespace population {
42 
43 // =================================================================================================
44 // Simple Pileup Input Iterator
45 // =================================================================================================
46 
75 {
76 public:
77 
78  // -------------------------------------------------------------------------
79  // Member Types
80  // -------------------------------------------------------------------------
81 
84  using pointer = value_type*;
86  using const_reference = value_type const&;
87  using difference_type = std::ptrdiff_t;
88  using iterator_category = std::input_iterator_tag;
89 
90  // -------------------------------------------------------------------------
91  // Constructors and Rule of Five
92  // -------------------------------------------------------------------------
93 
97  SyncInputIterator() = default;
98 
104  std::shared_ptr< utils::BaseInputSource > source,
105  SyncReader const& reader = {}
106  )
107  : input_stream_( std::make_shared<utils::InputStream>( source ))
108  , reader_( reader )
109  {
110  // Read the first record of the file.
111  increment();
112  }
113 
120  std::shared_ptr< utils::BaseInputSource > source,
121  std::vector<bool> const& sample_filter,
122  SyncReader const& reader = {}
123  )
124  : input_stream_( std::make_shared<utils::InputStream>( source ))
125  , reader_( reader )
126  , sample_filter_( sample_filter )
127  , use_sample_filter_( true )
128  {
129  // Read the first record of the file.
130  increment();
131  }
132 
133  ~SyncInputIterator() = default;
134 
135  SyncInputIterator( self_type const& ) = default;
136  SyncInputIterator( self_type&& ) = default;
137 
138  self_type& operator= ( self_type const& ) = default;
139  self_type& operator= ( self_type&& ) = default;
140 
141  // -------------------------------------------------------------------------
142  // Comparators
143  // -------------------------------------------------------------------------
144 
148  explicit operator bool() const
149  {
150  return good_;
151  }
152 
153  bool good() const
154  {
155  return good_;
156  }
157 
158  // -------------------------------------------------------------------------
159  // Accessors
160  // -------------------------------------------------------------------------
161 
162  Variant const& variant() const
163  {
164  return variant_;
165  }
166 
168  {
169  return variant_;
170  }
171 
172  Variant const* operator ->() const
173  {
174  return &variant_;
175  }
176 
178  {
179  return &variant_;
180  }
181 
182  Variant const& operator*() const
183  {
184  return variant_;
185  }
186 
188  {
189  return variant_;
190  }
191 
192  // -------------------------------------------------------------------------
193  // Iteration
194  // -------------------------------------------------------------------------
195 
197  {
198  increment();
199  return *this;
200  }
201 
203  {
204  increment();
205  return *this;
206  }
207 
208  void increment()
209  {
210  // Read into temp object, so that we have the previous one still available.
211  // Same logic as SimplePileupInputIterator<Variant>::increment_(), see there for details.
212  Variant tmp;
213  tmp.samples.resize( variant_.samples.size() );
214  if( use_sample_filter_ ) {
215  good_ = reader_.parse_line( *input_stream_, tmp, sample_filter_ );
216  } else {
217  good_ = reader_.parse_line( *input_stream_, tmp );
218  }
219 
220  // Make sure that the input is sorted.
221  if( good_ &&
222  (
223  ( tmp.chromosome < variant_.chromosome ) ||
224  ( tmp.chromosome == variant_.chromosome && tmp.position <= variant_.position )
225  )
226  ) {
227  throw std::runtime_error(
228  "Malformed pileup " + input_stream_->source_name() + " at " + input_stream_->at() +
229  ": unordered chromosomes and positions"
230  );
231  }
232  variant_ = std::move( tmp );
233  }
234 
235  bool operator==( self_type const& it ) const
236  {
237  return good_ == it.good_;
238  }
239 
240  bool operator!=( self_type const& it ) const
241  {
242  return !(*this == it);
243  }
244 
245  // -------------------------------------------------------------------------
246  // Data Members
247  // -------------------------------------------------------------------------
248 
249 private:
250 
251  // Basic iterator setup and input.
252  bool good_ = false;
253  std::shared_ptr<utils::InputStream> input_stream_;
254 
255  // Reading into variants
256  Variant variant_;
257  SyncReader reader_;
258 
259  // Sample filtering
260  std::vector<bool> sample_filter_;
261  bool use_sample_filter_ = false;
262 };
263 
264 } // namespace population
265 } // namespace genesis
266 
267 #endif // include guard
genesis::population::SyncInputIterator::SyncInputIterator
SyncInputIterator(std::shared_ptr< utils::BaseInputSource > source, SyncReader const &reader={})
Create an instance that reads from an input source, and optionally take a SyncReader with settings to...
Definition: sync_input_iterator.hpp:103
genesis::population::SyncInputIterator::SyncInputIterator
SyncInputIterator(std::shared_ptr< utils::BaseInputSource > source, std::vector< bool > const &sample_filter, SyncReader const &reader={})
Create an instance that reads from an input source, using only the samples at the indices where the s...
Definition: sync_input_iterator.hpp:119
genesis::population::SyncInputIterator::operator++
self_type & operator++()
Definition: sync_input_iterator.hpp:196
genesis::population::Variant::position
size_t position
Definition: variant.hpp:65
genesis::population::SyncInputIterator::difference_type
std::ptrdiff_t difference_type
Definition: sync_input_iterator.hpp:87
sync_reader.hpp
genesis::population::SyncInputIterator::SyncInputIterator
SyncInputIterator()=default
Create a default instance, with no input.
genesis::population::SyncInputIterator::operator->
Variant const * operator->() const
Definition: sync_input_iterator.hpp:172
genesis::population::SyncReader::parse_line
bool parse_line(utils::InputStream &input_stream, Variant &sample_set) const
Definition: sync_reader.cpp:106
genesis::population::SyncInputIterator::variant
Variant & variant()
Definition: sync_input_iterator.hpp:167
genesis::population::SyncInputIterator::operator=
self_type & operator=(self_type const &)=default
genesis::population::SyncInputIterator::increment
void increment()
Definition: sync_input_iterator.hpp:208
genesis::population::SyncInputIterator::self_type
SyncInputIterator self_type
Definition: sync_input_iterator.hpp:82
input_source.hpp
genesis::population::SyncInputIterator::operator==
bool operator==(self_type const &it) const
Definition: sync_input_iterator.hpp:235
genesis::population::SyncInputIterator::operator*
Variant const & operator*() const
Definition: sync_input_iterator.hpp:182
genesis::population::Variant::samples
std::vector< BaseCounts > samples
Definition: variant.hpp:69
genesis::population::Variant
A single variant at a position in a chromosome, along with BaseCounts for a set of samples.
Definition: variant.hpp:62
genesis::population::SyncInputIterator::const_reference
value_type const & const_reference
Definition: sync_input_iterator.hpp:86
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::SyncInputIterator::~SyncInputIterator
~SyncInputIterator()=default
genesis::population::SyncInputIterator::variant
Variant const & variant() const
Definition: sync_input_iterator.hpp:162
genesis::population::SyncInputIterator
Iterate an input source and parse it as a sync file.
Definition: sync_input_iterator.hpp:74
genesis::population::SyncInputIterator::good
bool good() const
Definition: sync_input_iterator.hpp:153
genesis::population::SyncInputIterator::operator!=
bool operator!=(self_type const &it) const
Definition: sync_input_iterator.hpp:240
genesis::population::SyncReader
Reader for PoPoolation2's "synchronized" files.
Definition: sync_reader.hpp:62
genesis::population::SyncInputIterator::iterator_category
std::input_iterator_tag iterator_category
Definition: sync_input_iterator.hpp:88
genesis::population::SyncInputIterator::operator*
Variant & operator*()
Definition: sync_input_iterator.hpp:187
genesis::population::Variant::chromosome
std::string chromosome
Definition: variant.hpp:64