A library for working with phylogenetic data.
v0.25.0
simple_pileup_reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_FORMATS_SIMPLE_PILEUP_READER_H_
2 #define GENESIS_POPULATION_FORMATS_SIMPLE_PILEUP_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2021 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
37 
38 #include <string>
39 #include <vector>
40 
41 namespace genesis {
42 namespace population {
43 
44 // =================================================================================================
45 // Simple (m)pileup Reader
46 // =================================================================================================
47 
69 {
70 public:
71 
72  // -------------------------------------------------------------------------
73  // Typedefs and Enums
74  // -------------------------------------------------------------------------
75 
92  struct Sample
93  {
106  size_t read_coverage = 0;
107 
115  std::string read_bases;
116 
124  std::vector<unsigned char> phred_scores;
125 
131  char ancestral_base = '\0';
132  };
133 
141  struct Record
142  {
143  std::string chromosome;
144  size_t position;
146  std::vector<Sample> samples;
147  };
148 
150 
151  // -------------------------------------------------------------------------
152  // Constructors and Rule of Five
153  // -------------------------------------------------------------------------
154 
155  SimplePileupReader() = default;
156  ~SimplePileupReader() = default;
157 
158  SimplePileupReader( self_type const& ) = default;
159  SimplePileupReader( self_type&& ) = default;
160 
161  self_type& operator= ( self_type const& ) = default;
162  self_type& operator= ( self_type&& ) = default;
163 
164  // ---------------------------------------------------------------------
165  // Reading
166  // ---------------------------------------------------------------------
167 
171  std::vector<Record> read( std::shared_ptr< utils::BaseInputSource > source ) const;
172 
176  std::vector<Record> read(
177  std::shared_ptr< utils::BaseInputSource > source,
178  std::vector<size_t> const& sample_indices
179  ) const;
180 
189  std::vector<Record> read(
190  std::shared_ptr< utils::BaseInputSource > source,
191  std::vector<bool> const& sample_filter
192  ) const;
193 
194  // -------------------------------------------------------------------------
195  // Parsing
196  // -------------------------------------------------------------------------
197 
201  bool parse_line(
202  utils::InputStream& input_stream,
203  Record& record
204  ) const;
205 
213  bool parse_line(
214  utils::InputStream& input_stream,
215  Record& record,
216  std::vector<bool> const& sample_filter
217  ) const;
218 
219  // -------------------------------------------------------------------------
220  // Settings
221  // -------------------------------------------------------------------------
222 
223  bool with_quality_string() const
224  {
225  return with_quality_string_;
226  }
227 
244  {
245  with_quality_string_ = value;
246  return *this;
247  }
248 
250  {
251  return quality_encoding_;
252  }
253 
262  {
263  quality_encoding_ = value;
264  return *this;
265  }
266 
267  bool with_ancestral_base() const
268  {
269  return with_ancestral_base_;
270  }
271 
288  {
289  with_ancestral_base_ = value;
290  return *this;
291  }
292 
293  // -------------------------------------------------------------------------
294  // Helper Functions
295  // -------------------------------------------------------------------------
296 
300  static std::vector<bool> make_sample_filter( std::vector<size_t> const& indices );
301 
302  // -------------------------------------------------------------------------
303  // Internal Members
304  // -------------------------------------------------------------------------
305 
306 private:
307 
308  bool parse_line_(
309  utils::InputStream& input_stream,
310  Record& record,
311  std::vector<bool> const& sample_filter,
312  bool use_sample_filter
313  ) const;
314 
315  void process_sample_(
316  utils::InputStream& input_stream,
317  Record& record,
318  Sample& sample
319  ) const;
320 
321  void skip_sample_(
322  utils::InputStream& input_stream
323  ) const;
324 
325  void next_field_(
326  utils::InputStream& input_stream
327  ) const;
328 
329  // -------------------------------------------------------------------------
330  // Data Members
331  // -------------------------------------------------------------------------
332 
333 private:
334 
335  // Set whether the file contains the base quality score column, and if so, how its encoded
336  // (we default to Sanger with offset 33), and if we want to skip low quality bases.
337  bool with_quality_string_ = true;
339 
340  // Set whether the last part of the sample line contains the base of the ancestral allele.
341  bool with_ancestral_base_ = false;
342 };
343 
344 } // namespace population
345 } // namespace genesis
346 
347 #endif // include guard
genesis::utils::InputStream
Stream interface for reading data from an InputSource, that keeps track of line and column counters.
Definition: input_stream.hpp:80
genesis::population::SimplePileupReader
Reader for line-by-line assessment of (m)pileup files.
Definition: simple_pileup_reader.hpp:68
genesis::population::SimplePileupReader::Record::position
size_t position
Definition: simple_pileup_reader.hpp:144
genesis::population::SimplePileupReader::read
std::vector< Record > read(std::shared_ptr< utils::BaseInputSource > source) const
Read an (m)pileup file line by line.
Definition: simple_pileup_reader.cpp:51
genesis::sequence::QualityEncoding::kSanger
@ kSanger
genesis::population::SimplePileupReader::Sample::read_coverage
size_t read_coverage
Total count of reads covering this position.
Definition: simple_pileup_reader.hpp:106
genesis::population::SimplePileupReader::quality_encoding
self_type & quality_encoding(sequence::QualityEncoding value)
Set the type of encoding for the quality code string.
Definition: simple_pileup_reader.hpp:261
input_source.hpp
input_stream.hpp
genesis::population::SimplePileupReader::Sample
One sample in a pileup line/record.
Definition: simple_pileup_reader.hpp:92
genesis::population::SimplePileupReader::~SimplePileupReader
~SimplePileupReader()=default
genesis::population::SimplePileupReader::Record
Single line/record from a pileup file.
Definition: simple_pileup_reader.hpp:141
genesis::population::SimplePileupReader::Record::reference_base
char reference_base
Definition: simple_pileup_reader.hpp:145
genesis::sequence::QualityEncoding
QualityEncoding
List of quality encodings for which we support decoding.
Definition: quality.hpp:71
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::SimplePileupReader::Sample::read_bases
std::string read_bases
All bases (expect for indels) of the reads that cover the given position.
Definition: simple_pileup_reader.hpp:115
genesis::population::SimplePileupReader::parse_line
bool parse_line(utils::InputStream &input_stream, Record &record) const
Read an (m)pileup line.
Definition: simple_pileup_reader.cpp:95
genesis::population::SimplePileupReader::with_quality_string
self_type & with_quality_string(bool value)
Set whether to expect a phred-scaled, ASCII-encoded quality code string per sample.
Definition: simple_pileup_reader.hpp:243
genesis::population::SimplePileupReader::Record::chromosome
std::string chromosome
Definition: simple_pileup_reader.hpp:143
genesis::population::SimplePileupReader::with_ancestral_base
self_type & with_ancestral_base(bool value)
Set whether to expect the base of the ancestral allele as the last part of each sample in a record li...
Definition: simple_pileup_reader.hpp:287
genesis::population::SimplePileupReader::make_sample_filter
static std::vector< bool > make_sample_filter(std::vector< size_t > const &indices)
Helper function to create a sample filter from a list of sample indices.
Definition: simple_pileup_reader.cpp:114
genesis::population::SimplePileupReader::Sample::ancestral_base
char ancestral_base
Base of the ancestral allele.
Definition: simple_pileup_reader.hpp:131
genesis::population::SimplePileupReader::operator=
self_type & operator=(self_type const &)=default
genesis::population::SimplePileupReader::Record::samples
std::vector< Sample > samples
Definition: simple_pileup_reader.hpp:146
genesis::population::SimplePileupReader::with_ancestral_base
bool with_ancestral_base() const
Definition: simple_pileup_reader.hpp:267
genesis::population::SimplePileupReader::SimplePileupReader
SimplePileupReader()=default
genesis::population::SimplePileupReader::Sample::phred_scores
std::vector< unsigned char > phred_scores
Phread-scaled scores of the bases as given in read_bases.
Definition: simple_pileup_reader.hpp:124
genesis::population::SimplePileupReader::quality_encoding
sequence::QualityEncoding quality_encoding() const
Definition: simple_pileup_reader.hpp:249
quality.hpp
genesis::population::SimplePileupReader::with_quality_string
bool with_quality_string() const
Definition: simple_pileup_reader.hpp:223