A library for working with phylogenetic and population genetic data.
v0.32.0
sync_reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_FORMAT_SYNC_READER_H_
2 #define GENESIS_POPULATION_FORMAT_SYNC_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2024 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
37 
38 #include <string>
39 #include <vector>
40 
41 namespace genesis {
42 namespace population {
43 
44 // =================================================================================================
45 // PoPoolation2 Synchronized File Reader
46 // =================================================================================================
47 
93 {
94 public:
95 
96  // -------------------------------------------------------------------------
97  // Constructors and Rule of Five
98  // -------------------------------------------------------------------------
99 
100  SyncReader() = default;
101  ~SyncReader() = default;
102 
103  SyncReader( SyncReader const& ) = default;
104  SyncReader( SyncReader&& ) = default;
105 
106  SyncReader& operator= ( SyncReader const& ) = default;
107  SyncReader& operator= ( SyncReader&& ) = default;
108 
109  // ---------------------------------------------------------------------
110  // Read Header
111  // ---------------------------------------------------------------------
112 
132  std::vector<std::string> read_header(
133  utils::InputStream& input_stream
134  ) const;
135 
148  std::vector<std::string> read_header(
149  utils::InputStream& input_stream,
150  std::vector<bool> const& sample_filter
151  ) const;
152 
153  // ---------------------------------------------------------------------
154  // Reading
155  // ---------------------------------------------------------------------
156 
160  std::vector<Variant> read(
161  std::shared_ptr< utils::BaseInputSource > source
162  ) const;
163 
172  std::vector<Variant> read(
173  std::shared_ptr< utils::BaseInputSource > source,
174  std::vector<bool> const& sample_filter
175  ) const;
176 
177  // -------------------------------------------------------------------------
178  // Parsing
179  // -------------------------------------------------------------------------
180 
186  bool parse_line(
187  utils::InputStream& input_stream,
188  Variant& sample_set
189  ) const;
190 
196  bool parse_line(
197  utils::InputStream& input_stream,
198  Variant& sample_set,
199  std::vector<bool> const& sample_filter
200  ) const;
201 
202  // -------------------------------------------------------------------------
203  // Settings
204  // -------------------------------------------------------------------------
205 
206  bool guess_alt_base() const
207  {
208  return guess_alt_base_;
209  }
210 
224  SyncReader& guess_alt_base( bool value )
225  {
226  guess_alt_base_ = value;
227  return *this;
228  }
229 
230  bool allow_missing() const
231  {
232  return allow_missing_;
233  }
234 
245  SyncReader& allow_missing( bool value )
246  {
247  allow_missing_ = value;
248  return *this;
249  }
250 
251  // -------------------------------------------------------------------------
252  // Internal Members
253  // -------------------------------------------------------------------------
254 
255 private:
256 
257  bool parse_line_(
258  utils::InputStream& input_stream,
259  Variant& sample_set,
260  std::vector<bool> const& sample_filter,
261  bool use_sample_filter
262  ) const;
263 
264  // Only use intrinsics version for the compilers that support them!
265  #if defined(__GNUC__) || defined(__GNUG__) || defined(__clang__)
266 
267  void parse_sample_gcc_intrinsic_(
268  utils::InputStream& input_stream,
269  SampleCounts& sample
270  ) const;
271 
272  #endif
273 
274  void parse_sample_simple_(
275  utils::InputStream& input_stream,
276  SampleCounts& sample
277  ) const;
278 
279  void parse_sample_(
280  utils::InputStream& input_stream,
281  SampleCounts& sample
282  ) const;
283 
284  void skip_sample_(
285  utils::InputStream& input_stream
286  ) const;
287 
288  // -------------------------------------------------------------------------
289  // Member Varables
290  // -------------------------------------------------------------------------
291 
292 private:
293 
294  bool guess_alt_base_ = false;
295  bool allow_missing_ = true;
296 
297 };
298 
299 } // namespace population
300 } // namespace genesis
301 
302 #endif // include guard
genesis::utils::InputStream
Stream interface for reading data from an InputSource, that keeps track of line and column counters.
Definition: input_stream.hpp:88
genesis::population::SyncReader::allow_missing
bool allow_missing() const
Definition: sync_reader.hpp:230
genesis::population::SyncReader::guess_alt_base
bool guess_alt_base() const
Definition: sync_reader.hpp:206
genesis::population::SampleCounts
One set of nucleotide sample counts, for example for a given sample that represents a pool of sequenc...
Definition: sample_counts.hpp:56
genesis::population::SyncReader::parse_line
bool parse_line(utils::InputStream &input_stream, Variant &sample_set) const
Read a single line into the provided Variant.
Definition: sync_reader.cpp:160
genesis::population::SyncReader::SyncReader
SyncReader()=default
genesis::population::SyncReader::allow_missing
SyncReader & allow_missing(bool value)
Set whether to allow missing data in the format suggested by Kapun et al.
Definition: sync_reader.hpp:245
input_source.hpp
genesis::population::SyncReader::read
std::vector< Variant > read(std::shared_ptr< utils::BaseInputSource > source) const
Read the whole input into a vector of Variants.
Definition: sync_reader.cpp:120
input_stream.hpp
genesis::population::SyncReader::operator=
SyncReader & operator=(SyncReader const &)=default
genesis::population::Variant
A single variant at a position in a chromosome, along with SampleCounts for a set of samples.
Definition: variant.hpp:65
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::SyncReader::read_header
std::vector< std::string > read_header(utils::InputStream &input_stream) const
Read the header line, if there is one. Do nothing if there is not.
Definition: sync_reader.cpp:56
variant.hpp
genesis::population::SyncReader::guess_alt_base
SyncReader & guess_alt_base(bool value)
Set to guess the alternative base of the Variant, instead of leaving it at 'N'.
Definition: sync_reader.hpp:224
genesis::population::SyncReader
Reader for PoPoolation2's "synchronized" files.
Definition: sync_reader.hpp:92
genesis::population::SyncReader::~SyncReader
~SyncReader()=default