A library for working with phylogenetic data.
v0.25.0
vcf_window.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_WINDOW_VCF_WINDOW_H_
2 #define GENESIS_POPULATION_WINDOW_VCF_WINDOW_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2020 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
34 #ifdef GENESIS_HTSLIB
35 
36 #include <cassert>
37 #include <functional>
38 #include <string>
39 #include <vector>
40 
45 
46 namespace genesis {
47 namespace population {
48 
49 // =================================================================================================
50 // VCF Window Generator
51 // =================================================================================================
52 
69 template<class Data, class Accumulator = EmptyAccumulator>
72  std::string const& vcf_file,
73  std::function<Data( VcfRecord const& )> conversion,
74  std::function<bool( VcfRecord const& )> condition = {}
75 ) {
76  size_t current_chr_len = 0;
77  for( auto record = VcfInputIterator( vcf_file ); record; ++record ) {
78 
79  // Check if we want to process this record at all.
80  if( condition && ! condition( *record )) {
81  continue;
82  }
83 
84  // Get the chromosome that this record belongs to.
85  auto const rec_chromosome = record->get_chromosome();
86 
87  // We need to take care of changes of the chromosome, that is, either at the very
88  // beginning of the VCF file, or at actual changes within the file.
89  // When finishing a chromosome, we want to use its proper length from the VCF header.
90  if( rec_chromosome != generator.chromosome() ) {
91 
92  // We store the length, and also make sure that we get the same value via two
93  // different methods. Safe is safe.
94  current_chr_len = record->header().get_chromosome_length( rec_chromosome );
95  assert(
96  record->header().get_chromosome_values( rec_chromosome ).count("length") == 0 ||
97  std::stoul(
98  record->header().get_chromosome_values( rec_chromosome ).at("length")
99  ) == current_chr_len
100  );
101 
102  // If there has been data enqueued before, this is not the first data point of the
103  // chromosome. In that case, we need to finish the previous chromosome first,
104  // and do so using its length to get the full interval covered and closed.
105  if( ! generator.empty() ) {
106 
107  // We might not have usable chromosome length information in the header.
108  // In that case, simply finish the current chromosome where it is.
109  // (As of now, finish_chromosome uses 0 as default value anyway, so we could just
110  // call it directly without the extra condition here. But this way seems a bit
111  // more future proof, if we need to change this and use overloads of
112  // finish_chromosome instead.)
113  if( current_chr_len == 0 ) {
114  generator.finish_chromosome();
115  } else {
116  generator.finish_chromosome( current_chr_len );
117  }
118  }
119 
120  } else {
121  // If we did not change chromosomes, we expect their length not to change.
122  assert( record->header().get_chromosome_length( rec_chromosome ) == current_chr_len );
123  }
124 
125  // Finally, enqueue the new data.
126  generator.enqueue( rec_chromosome, record->get_position(), conversion( *record ));
127  }
128 
129  // Now that we are done with the whole file, we also need to finish and close the
130  // last remaining chromosome interval properly. Same as above.
131  if( current_chr_len == 0 ) {
132  generator.finish_chromosome();
133  } else {
134  generator.finish_chromosome( current_chr_len );
135  }
136 }
137 
138 } // namespace population
139 } // namespace genesis
140 
141 #endif // htslib guard
142 #endif // include guard
genesis::population::SlidingWindowGenerator::chromosome
std::string const & chromosome() const
Get the chromosome name that we are currently processing.
Definition: sliding_window_generator.hpp:358
genesis::population::SlidingWindowGenerator::finish_chromosome
void finish_chromosome(size_t last_position=0)
Explicitly finish a chromosome, and emit all remaining Windows.
Definition: sliding_window_generator.hpp:561
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::SlidingWindowGenerator::empty
bool empty() const
Return whether the instance is empty.
Definition: sliding_window_generator.hpp:371
genesis::population::SlidingWindowGenerator::enqueue
void enqueue(std::string const &chromosome, size_t position, Data const &data)
Enqueue a new Data value.
Definition: sliding_window_generator.hpp:484
window.hpp
genesis::population::run_vcf_window
void run_vcf_window(SlidingWindowGenerator< Data, Accumulator > &generator, std::string const &vcf_file, std::function< Data(VcfRecord const &)> conversion, std::function< bool(VcfRecord const &)> condition={})
Convenience function to iterate over a whole VCF file.
Definition: vcf_window.hpp:70
vcf_input_iterator.hpp
genesis::population::VcfRecord
Capture the information of a single SNP/variant line in a VCF/BCF file.
Definition: vcf_record.hpp:107
vcf_record.hpp
sliding_window_generator.hpp
genesis::population::SlidingWindowGenerator
Generator for sliding Windows over the chromosomes of a genome.
Definition: sliding_window_generator.hpp:124