A library for working with phylogenetic and population genetic data.
v0.32.0
sync_input_stream.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_FORMAT_SYNC_INPUT_STREAM_H_
2 #define GENESIS_POPULATION_FORMAT_SYNC_INPUT_STREAM_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2024 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
36 
37 #include <string>
38 #include <vector>
39 
40 namespace genesis {
41 namespace population {
42 
43 // =================================================================================================
44 // Sync Input Stream
45 // =================================================================================================
46 
75 {
76 public:
77 
78  // -------------------------------------------------------------------------
79  // Member Types
80  // -------------------------------------------------------------------------
81 
84  using pointer = value_type*;
86  using const_reference = value_type const&;
87  using difference_type = std::ptrdiff_t;
88  using iterator_category = std::input_iterator_tag;
89 
90  // -------------------------------------------------------------------------
91  // Constructors and Rule of Five
92  // -------------------------------------------------------------------------
93 
97  SyncInputStream() = default;
98 
103  explicit SyncInputStream(
104  std::shared_ptr< utils::BaseInputSource > source,
105  SyncReader const& reader = {}
106  )
107  : input_stream_( std::make_shared<utils::InputStream>( source ))
108  , reader_( reader )
109  {
110  // Read the header line, if present. If not, this does nothing.
111  // Then, we use the number of sample names that we found to resize the variant that
112  // we read into. If there was no header, this will not do anything, so that the sync reader
113  // will resize by using the size of the first line instead.
114  // We init the size here with the header, which could also be 0, which would be okay.
115  sample_names_ = reader_.read_header( *input_stream_ );
116  sample_size_ = sample_names_.size();
117 
118  // Read the first record of the file.
119  increment();
120  }
121 
128  std::shared_ptr< utils::BaseInputSource > source,
129  std::vector<bool> const& sample_filter,
130  SyncReader const& reader = {}
131  )
132  : input_stream_( std::make_shared<utils::InputStream>( source ))
133  , reader_( reader )
134  , sample_filter_( sample_filter )
135  , use_sample_filter_( true )
136  {
137  // Same as above.
138  sample_names_ = reader_.read_header( *input_stream_, sample_filter_ );
139  sample_size_ = sample_names_.size();
140  increment();
141  }
142 
143  ~SyncInputStream() = default;
144 
145  SyncInputStream( self_type const& ) = default;
146  SyncInputStream( self_type&& ) = default;
147 
148  self_type& operator= ( self_type const& ) = default;
149  self_type& operator= ( self_type&& ) = default;
150 
151  // -------------------------------------------------------------------------
152  // Comparators
153  // -------------------------------------------------------------------------
154 
158  explicit operator bool() const
159  {
160  return good_;
161  }
162 
163  bool good() const
164  {
165  return good_;
166  }
167 
168  // -------------------------------------------------------------------------
169  // Accessors
170  // -------------------------------------------------------------------------
171 
172  std::vector<std::string> const& get_sample_names() const
173  {
174  return sample_names_;
175  }
176 
177  Variant const& variant() const
178  {
179  return variant_;
180  }
181 
183  {
184  return variant_;
185  }
186 
187  Variant const* operator ->() const
188  {
189  return &variant_;
190  }
191 
193  {
194  return &variant_;
195  }
196 
197  Variant const& operator*() const
198  {
199  return variant_;
200  }
201 
203  {
204  return variant_;
205  }
206 
207  // -------------------------------------------------------------------------
208  // Iteration
209  // -------------------------------------------------------------------------
210 
212  {
213  increment();
214  return *this;
215  }
216 
218  {
219  increment();
220  return *this;
221  }
222 
223  void increment()
224  {
225  // We don't do any order checks here (for example, on the order of the input),
226  // and leave that to downstream checkers that might want to add this on top.
227  // NB: We used to have a check here, by reading into a temp instance first,
228  // and checking if the chromosome and position order was working.
229  // But this did not work as intended when this iterator/reader here was used with
230  // make_variant_input_stream_from_sync_file(), as in that case, the chromosome name
231  // is moved out of the Variant, so that we would always test against an empty moved-from
232  // chromosome name, which would just never fail, and so completely miss its purpose.
233  // Now, we don't do that check any more, but if we bring back that step from old commits,
234  // be aware that it did not work as intended.
235 
236  // We set the size here, so that the reader checks the correct sample size every time.
237  // We need to reset, as the Variant might be moved-from when we get here,
238  // as for example the make_variant_input_stream_from_sync_file() iterator does that.
239  // We then also set that size again after we are done - as a means of initializing
240  // it in the first iteration, in case that there was no header that already did that.
241  variant_.samples.resize( sample_size_ );
242  if( use_sample_filter_ ) {
243  good_ = reader_.parse_line( *input_stream_, variant_, sample_filter_ );
244  } else {
245  good_ = reader_.parse_line( *input_stream_, variant_ );
246  }
247  sample_size_ = variant_.samples.size();
248  }
249 
250  bool operator==( self_type const& it ) const
251  {
252  return good_ == it.good_;
253  }
254 
255  bool operator!=( self_type const& it ) const
256  {
257  return !(*this == it);
258  }
259 
260  // -------------------------------------------------------------------------
261  // Data Members
262  // -------------------------------------------------------------------------
263 
264 private:
265 
266  // Basic iterator setup and input.
267  bool good_ = false;
268  std::shared_ptr<utils::InputStream> input_stream_;
269 
270  // Reading into variants
271  SyncReader reader_;
272  std::vector<std::string> sample_names_;
273  size_t sample_size_ = 0;
274  Variant variant_;
275 
276  // Sample filtering
277  std::vector<bool> sample_filter_;
278  bool use_sample_filter_ = false;
279 };
280 
281 } // namespace population
282 } // namespace genesis
283 
284 #endif // include guard
genesis::population::SyncInputStream::self_type
SyncInputStream self_type
Definition: sync_input_stream.hpp:82
genesis::population::SyncInputStream::iterator_category
std::input_iterator_tag iterator_category
Definition: sync_input_stream.hpp:88
genesis::population::SyncInputStream::operator=
self_type & operator=(self_type const &)=default
genesis::population::SyncInputStream::operator==
bool operator==(self_type const &it) const
Definition: sync_input_stream.hpp:250
genesis::population::SyncInputStream::const_reference
value_type const & const_reference
Definition: sync_input_stream.hpp:86
sync_reader.hpp
genesis::population::SyncInputStream::operator->
Variant const * operator->() const
Definition: sync_input_stream.hpp:187
genesis::population::SyncInputStream::SyncInputStream
SyncInputStream()=default
Create a default instance, with no input.
genesis::population::SyncReader::parse_line
bool parse_line(utils::InputStream &input_stream, Variant &sample_set) const
Read a single line into the provided Variant.
Definition: sync_reader.cpp:160
genesis::population::SyncInputStream::operator*
Variant & operator*()
Definition: sync_input_stream.hpp:202
input_source.hpp
genesis::population::SyncInputStream::SyncInputStream
SyncInputStream(std::shared_ptr< utils::BaseInputSource > source, std::vector< bool > const &sample_filter, SyncReader const &reader={})
Create an instance that reads from an input source, using only the samples at the indices where the s...
Definition: sync_input_stream.hpp:127
genesis::population::Variant
A single variant at a position in a chromosome, along with SampleCounts for a set of samples.
Definition: variant.hpp:65
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::SyncInputStream::good
bool good() const
Definition: sync_input_stream.hpp:163
genesis::population::SyncInputStream::operator*
Variant const & operator*() const
Definition: sync_input_stream.hpp:197
genesis::population::SyncInputStream::difference_type
std::ptrdiff_t difference_type
Definition: sync_input_stream.hpp:87
genesis::population::SyncInputStream::variant
Variant & variant()
Definition: sync_input_stream.hpp:182
genesis::population::SyncReader::read_header
std::vector< std::string > read_header(utils::InputStream &input_stream) const
Read the header line, if there is one. Do nothing if there is not.
Definition: sync_reader.cpp:56
genesis::population::SyncInputStream
Iterate an input source and parse it as a sync file.
Definition: sync_input_stream.hpp:74
genesis::population::SyncInputStream::get_sample_names
std::vector< std::string > const & get_sample_names() const
Definition: sync_input_stream.hpp:172
genesis::population::SyncInputStream::variant
Variant const & variant() const
Definition: sync_input_stream.hpp:177
genesis::population::SyncInputStream::SyncInputStream
SyncInputStream(std::shared_ptr< utils::BaseInputSource > source, SyncReader const &reader={})
Create an instance that reads from an input source, and optionally take a SyncReader with settings to...
Definition: sync_input_stream.hpp:103
genesis::population::Variant::samples
std::vector< SampleCounts > samples
Definition: variant.hpp:82
genesis::population::SyncInputStream::operator!=
bool operator!=(self_type const &it) const
Definition: sync_input_stream.hpp:255
genesis::population::SyncInputStream::operator++
self_type & operator++()
Definition: sync_input_stream.hpp:211
genesis::population::SyncReader
Reader for PoPoolation2's "synchronized" files.
Definition: sync_reader.hpp:92
genesis::population::SyncInputStream::~SyncInputStream
~SyncInputStream()=default
genesis::population::SyncInputStream::increment
void increment()
Definition: sync_input_stream.hpp:223