A library for working with phylogenetic and population genetic data.
v0.27.0
variant_input_iterator.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_FORMATS_VARIANT_INPUT_ITERATOR_H_
2 #define GENESIS_POPULATION_FORMATS_VARIANT_INPUT_ITERATOR_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2022 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
42 
43 #include <cassert>
44 #include <functional>
45 #include <stdexcept>
46 #include <string>
47 #include <vector>
48 
49 namespace genesis {
50 namespace population {
51 
52 // =================================================================================================
53 // Forward Declarations
54 // =================================================================================================
55 
56 // Cannot include the header, as it itself needs the VariantInputIterator to be defined first.
57 // We could split this whole file into two, one for the VariantInputIterator definition,
58 // and one for the functions to create it from sources... Maybe do that in the future.
59 // As of now, this forward declaration means that the VariantParallelInputIterator header
60 // needs to be included by the user at some point themselves... which they probably do anyway.
61 class VariantParallelInputIterator;
62 
63 // =================================================================================================
64 // Generic Variant Iterator
65 // =================================================================================================
66 
81 {
85  std::string file_path;
86 
92  std::string source_name;
93 
101  std::vector<std::string> sample_names;
102 };
103 
125 
126 // =================================================================================================
127 // Input Sources
128 // =================================================================================================
129 
130 // -------------------------------------------------------------------------
131 // SAM/BAM/CRAM
132 // -------------------------------------------------------------------------
133 
134 // Only available if compiled with htslib
135 #ifdef GENESIS_HTSLIB
136 
152  std::string const& filename,
154 );
155 
156 #endif // GENESIS_HTSLIB
157 
158 // -------------------------------------------------------------------------
159 // Pileup
160 // -------------------------------------------------------------------------
161 
168  std::string const& filename,
169  SimplePileupReader const& reader = SimplePileupReader{}
170 );
171 
186  std::string const& filename,
187  std::vector<size_t> const& sample_indices,
188  bool inverse_sample_indices = false,
189  SimplePileupReader const& reader = SimplePileupReader{}
190 );
191 
199  std::string const& filename,
200  std::vector<bool> const& sample_filter,
201  SimplePileupReader const& reader = SimplePileupReader{}
202 );
203 
204 // -------------------------------------------------------------------------
205 // Sync
206 // -------------------------------------------------------------------------
207 
213  std::string const& filename
214 );
215 
229  std::string const& filename,
230  std::vector<size_t> const& sample_indices,
231  bool inverse_sample_indices = false
232 );
233 
242  std::string const& filename,
243  std::vector<bool> const& sample_filter
244 );
245 
246 // -------------------------------------------------------------------------
247 // VCF
248 // -------------------------------------------------------------------------
249 
250 // Only available if compiled with htslib
251 #ifdef GENESIS_HTSLIB
252 
276  std::string const& filename,
277  bool only_biallelic = true,
278  bool only_filter_pass = true
279 );
280 
289  std::string const& filename,
290  std::vector<std::string> const& sample_names,
291  bool inverse_sample_names = false,
292  bool only_biallelic = true,
293  bool only_filter_pass = true
294 );
295 
311  std::string const& filename,
312  bool use_allelic_depth = false,
313  bool only_biallelic = true,
314  bool only_filter_pass = true
315 );
316 
325  std::string const& filename,
326  std::vector<std::string> const& sample_names,
327  bool inverse_sample_names = false,
328  bool use_allelic_depth = false,
329  bool only_biallelic = true,
330  bool only_filter_pass = true
331 );
332 
333 #endif // GENESIS_HTSLIB
334 
335 // -------------------------------------------------------------------------
336 // Variant Parallel Input Iterator
337 // -------------------------------------------------------------------------
338 
355  VariantParallelInputIterator const& parallel_input,
356  bool allow_ref_base_mismatches = false,
357  bool allow_alt_base_mismatches = true,
358  std::string const& source_sample_separator = ":"
359 );
360 
361 } // namespace population
362 } // namespace genesis
363 
364 #endif // include guard
genesis::population::SamVariantInputIterator
Input iterator for SAM/BAM/CRAM files that produces a Variant per genome position.
Definition: sam_variant_input_iterator.hpp:102
genesis::utils::LambdaIterator
Type erasure for iterators, using std::function to eliminate the underlying input type.
Definition: lambda_iterator.hpp:150
base_counts.hpp
genesis::population::VariantInputIteratorData::file_path
std::string file_path
Full file path, when reading from a file.
Definition: variant_input_iterator.hpp:85
sync_reader.hpp
genesis::population::make_variant_input_iterator_from_sync_file
VariantInputIterator make_variant_input_iterator_from_sync_file(std::string const &filename)
Create a VariantInputIterator to iterate the contents of a PoPoolation2 sync file as Variants.
Definition: variant_input_iterator.cpp:314
simple_pileup_input_iterator.hpp
genesis::population::VariantInputIteratorData::source_name
std::string source_name
User-readable name of the input source.
Definition: variant_input_iterator.hpp:92
sam_variant_input_iterator.hpp
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::VariantInputIteratorData::sample_names
std::vector< std::string > sample_names
Sample names, for example as found in the file header.
Definition: variant_input_iterator.hpp:101
genesis::population::make_variant_input_iterator_from_individual_vcf_file
VariantInputIterator make_variant_input_iterator_from_individual_vcf_file(std::string const &filename, bool use_allelic_depth, bool only_biallelic, bool only_filter_pass)
Create a VariantInputIterator to iterate the contents of a VCF file as Variants, treating each sample...
Definition: variant_input_iterator.cpp:456
genesis::population::make_variant_input_iterator_from_sam_file
VariantInputIterator make_variant_input_iterator_from_sam_file(std::string const &filename, SamVariantInputIterator const &reader)
Create a VariantInputIterator to iterate the contents of a SAM/BAM/CRAM file as Variants.
Definition: variant_input_iterator.cpp:129
vcf_input_iterator.hpp
variant.hpp
genesis::population::VariantInputIterator
utils::LambdaIterator< Variant, VariantInputIteratorData > VariantInputIterator
Iterate Variants, using a variety of input file formats.
Definition: variant_input_iterator.hpp:124
sync_input_iterator.hpp
genesis::population::make_variant_input_iterator_from_pool_vcf_file
VariantInputIterator make_variant_input_iterator_from_pool_vcf_file(std::string const &filename, bool only_biallelic, bool only_filter_pass)
Create a VariantInputIterator to iterate the contents of a VCF file as Variants, treating each sample...
Definition: variant_input_iterator.cpp:432
genesis::population::make_variant_input_iterator_from_pileup_file
VariantInputIterator make_variant_input_iterator_from_pileup_file(std::string const &filename, SimplePileupReader const &reader)
Create a VariantInputIterator to iterate the contents of a (m)pileup file as Variants.
Definition: variant_input_iterator.cpp:237
lambda_iterator.hpp
genesis::population::make_variant_input_iterator_from_variant_parallel_input_iterator
VariantInputIterator make_variant_input_iterator_from_variant_parallel_input_iterator(VariantParallelInputIterator const &parallel_input, bool allow_ref_base_mismatches, bool allow_alt_base_mismatches, std::string const &source_sample_separator)
Create a VariantInputIterator to iterate multiple input sources at once, using a VariantParallelInput...
Definition: variant_input_iterator.cpp:488
genesis::population::VariantInputIteratorData
Data storage for input-specific information when traversing a variant file.
Definition: variant_input_iterator.hpp:80