A library for working with phylogenetic and population genetic data.
v0.32.0
variant_input_stream_sources.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2024 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lczech@carnegiescience.edu>
20  Department of Plant Biology, Carnegie Institution For Science
21  260 Panama Street, Stanford, CA 94305, USA
22 */
23 
32 
40 
41 #include <algorithm>
42 #include <cassert>
43 #include <cstdint>
44 #include <memory>
45 #include <stdexcept>
46 #include <unordered_set>
47 #include <utility>
48 
49 namespace genesis {
50 namespace population {
51 
52 // =================================================================================================
53 // Local Helpers
54 // =================================================================================================
55 
66 template<class T, class R>
68  std::string const& filename,
69  R const& reader,
70  std::vector<size_t> const& sample_indices,
71  bool inverse_sample_indices,
72  std::vector<bool> const& sample_filter
73 ) {
74  // Prepare a reader. We switch depending on the type of filter.
75  // Not both can be given by the way that this function is called; assert that.
76  assert( sample_indices.empty() || sample_filter.empty() );
77 
78  std::shared_ptr<T> input;
79  if( ! sample_indices.empty() ) {
80 
81  // When we have indices given, we need to open the file once to get the number of samples
82  // in the file, then create our correctly sized bool vector, and then open the file again
83  // to start iterating with the filter. Cumbersome, but an unfortunate detail of the
84  // current implementation. Might need fixing later.
85  input = std::make_shared<T>( utils::from_file( filename ), reader );
86  auto const smp_cnt = (*input)->samples.size();
87 
88  // Make the filter. We check the condition that the make function checks here as well,
89  // as the error message is super not helpful for users otherwise. See there for details.
90  auto max_it = std::max_element( sample_indices.begin(), sample_indices.end() );
91  if( *max_it + 1 > smp_cnt ) {
92  throw std::invalid_argument(
93  "In " + filename + ": "
94  "Cannot create sample filter for the input file, as the filter index list contains "
95  "entries for " + std::to_string( *max_it + 1 ) + " samples, "
96  "while the input file only contains " + std::to_string( smp_cnt ) + " samples."
97  );
98  }
99 
100  // Now make a bool filter, inverse as needed, and restart the file with it.
101  auto smp_flt = utils::make_bool_vector_from_indices( sample_indices, smp_cnt );
102  if( inverse_sample_indices ) {
103  smp_flt.flip();
104  }
105  input = std::make_shared<T>(
106  utils::from_file( filename ),
107  smp_flt,
108  reader
109  );
110 
111  } else if( ! sample_filter.empty() ) {
112  input = std::make_shared<T>(
113  utils::from_file( filename ),
114  sample_filter,
115  reader
116  );
117  } else {
118  input = std::make_shared<T>(
119  utils::from_file( filename ),
120  reader
121  );
122  }
123  assert( input );
124  return input;
125 }
126 
133 std::vector<std::string> make_sample_name_list_( std::string const& source_name, size_t size )
134 {
135  std::vector<std::string> result;
136  result.reserve( size );
137  for( size_t i = 0; i < size; ++i ) {
138  result.push_back( source_name + "." + std::to_string( i + 1 ));
139  }
140  return result;
141 }
142 
143 // =================================================================================================
144 // vector
145 // =================================================================================================
146 
148  std::vector<Variant> const& variants
149 ) {
150 
151  // Prepare the iterator data.
153  data.source_name = "std::vector";
154 
155  // No sample names in a vector... so we just use numbered entries.
156  if( ! variants.empty() ) {
157  data.sample_names = make_sample_name_list_( data.source_name, variants[0].samples.size() );
158  }
159 
160  // Get iterators to the data.
161  auto cur = variants.begin();
162  auto end = variants.end();
163 
164  // The iterators are copied over to the lambda,
165  // and those copies are kept alive when returning from this function.
166  return VariantInputStream(
167  [ cur, end ]( Variant& variant ) mutable {
168  if( cur != end ) {
169  // We make copies of the data here, as we do not want to modify the vector.
170  variant = *cur;
171  ++cur;
172  return true;
173  } else {
174  return false;
175  }
176  },
177  std::move( data )
178  );
179 }
180 
181 // =================================================================================================
182 // SAM/BAM/CRAM
183 // =================================================================================================
184 
185 // Only available if compiled with htslib
186 #ifdef GENESIS_HTSLIB
187 
189  std::string const& filename,
190  SamVariantInputStream const& reader
191 ) {
192  // Make an iterator over sam/bam/cram, using the given reader to take over its settings.
193  // We wrap this in a shared pointer so that this very instance can stay alive
194  // when being copied over to the lambda that we return from this function.
195  auto input = std::make_shared<SamVariantInputStream>( reader );
196  input->input_file( filename );
197 
198  // Get the iterators. We store them by copy in the lambda, and these copies are kept alive
199  // when returning from this function. Unfortunately, at the moment, we need to open the file
200  // here already and start the iteration, as we need access to some information from the file
201  // content itself, namely, the rg tags. The way that the sam reading is currently implemented,
202  // we have to do this. Might fix in the future, to avoid starting the iteration here already.
203  assert( input );
204  auto cur = input->begin();
205  auto end = input->end();
206 
207  // Get the data, using the file base name without path and potential extensions as source.
209  data.file_path = filename;
210  data.source_name = utils::file_basename( filename, { ".sam", ".sam.gz", ".bam", ".cram" });
211 
212  // Get the sample names from the read group tags. If they are empty, because the reader
213  // was not set up to split by read group tags, we instead use an empty name, to indicate that
214  // there is one unnamed sample.
215  data.sample_names = cur.rg_tags();
216  if( data.sample_names.empty() ) {
217  // We could have an input file where we want to split by RG, but no RG are set in the
218  // header. When not using unaccounted RG, we would end up with no samples.
219  // Take this into account, and create as many empty (unnamed) samples as needed.
220  // This cannot be more than one though, as it can be the unaccounted or none,
221  // or, if we do not split by RG at all, just the one sample were every read ends up in.
222  // data.sample_names = make_sample_name_list_( data.source_name, cur.sample_size() );
223  // assert( data.sample_names.size() <= 1 );
224 
225  // Scratch that. If we treat the file as a single sample anyway, we just use the file name
226  // as the sample name. Way more intuitive. Unfortunately, there is then the inconsistency
227  // in naming, but it's more in line with what e.g. the sync does if a header is provided.
228  assert( cur.sample_size() <= 1 );
229  if( cur.sample_size() == 1 ) {
230  data.sample_names = std::vector<std::string>{ data.source_name };
231  }
232  } else {
233  assert( reader.split_by_rg() == true );
234  }
235 
236  // The input is copied over to the lambda, and that copy is kept alive
237  // when returning from this function.
238  return VariantInputStream(
239  [ input, cur, end ]( Variant& variant ) mutable {
240  if( cur != end ) {
241  variant = std::move( *cur );
242  ++cur;
243  return true;
244  } else {
245  return false;
246  }
247  },
248  std::move( data )
249  );
250 }
251 
252 #endif // GENESIS_HTSLIB
253 
254 // =================================================================================================
255 // Pileup
256 // =================================================================================================
257 
262  std::string const& filename,
263  SimplePileupReader const& reader,
264  std::vector<size_t> const& sample_indices,
265  bool inverse_sample_indices,
266  std::vector<bool> const& sample_filter
267 ) {
268  // Get the input, taking care of the filters.
271  >(
272  filename, reader, sample_indices, inverse_sample_indices, sample_filter
273  );
274 
275  // Get the data, using the file base name without path and potential extensions as source.
277  data.file_path = filename;
278  data.source_name = utils::file_basename(
279  filename, { ".gz", ".plp", ".mplp", ".pileup", ".mpileup" }
280  );
281 
282  // No sample names in pileup, use numbers instead.
283  data.sample_names = make_sample_name_list_( data.source_name, (*input)->samples.size() );
284 
285  // The input is copied over to the lambda, and that copy is kept alive
286  // when returning from this function.
287  return VariantInputStream(
288  [ input ]( Variant& variant ) mutable -> bool {
289  auto& it = *input;
290  if( it ) {
291  variant = std::move( *it );
292  ++it;
293  return true;
294  } else {
295  return false;
296  }
297  },
298  std::move( data )
299  );
300 }
301 
303  std::string const& filename,
304  SimplePileupReader const& reader
305 ) {
307  filename, reader, std::vector<size_t>{}, false, std::vector<bool>{}
308  );
309 }
310 
312  std::string const& filename,
313  std::vector<size_t> const& sample_indices,
314  bool inverse_sample_indices,
315  SimplePileupReader const& reader
316 ) {
318  filename, reader, sample_indices, inverse_sample_indices, std::vector<bool>{}
319  );
320 }
321 
323  std::string const& filename,
324  std::vector<bool> const& sample_filter,
325  SimplePileupReader const& reader
326 ) {
328  filename, reader, std::vector<size_t>{}, false, sample_filter
329  );
330 }
331 
332 // =================================================================================================
333 // Sync
334 // =================================================================================================
335 
337  std::string const& filename,
338  std::vector<size_t> const& sample_indices,
339  bool inverse_sample_indices,
340  std::vector<bool> const& sample_filter
341 ) {
342  // Get the input, taking care of the filters. We use a default reader here,
343  // as sync currently does not have any settings that a reader would neeed to take care of.
346  >(
347  filename, SyncReader(), sample_indices, inverse_sample_indices, sample_filter
348  );
349  // auto input = std::make_shared<SyncInputStream>( utils::from_file( filename ));
350 
351  // Get the data, using the file base name without path and potential extensions as source.
353  data.file_path = filename;
354  data.source_name = utils::file_basename( filename, { ".gz", ".sync" });
355 
356  if( input->get_sample_names().size() > 0 ) {
357  // If we have sample names, using our ad-hoc extension, use these.
358  data.sample_names = input->get_sample_names();
359  } else {
360  // No sample names given, so we use numbers instead.
361  data.sample_names = make_sample_name_list_( data.source_name, (*input)->samples.size() );
362  }
363 
364  // The input is copied over to the lambda, and that copy is kept alive
365  // when returning from this function.
366  return VariantInputStream(
367  [ input ]( Variant& variant ) mutable {
368  auto& sync_it = *input;
369  if( sync_it ) {
370  variant = std::move( *sync_it );
371  ++sync_it;
372  return true;
373  } else {
374  return false;
375  }
376  },
377  std::move( data )
378  );
379 }
380 
382  std::string const& filename
383 ) {
385  filename, std::vector<size_t>{}, false, std::vector<bool>{}
386  );
387 }
388 
390  std::string const& filename,
391  std::vector<size_t> const& sample_indices,
392  bool inverse_sample_indices
393 ) {
395  filename, sample_indices, inverse_sample_indices, std::vector<bool>{}
396  );
397 }
398 
400  std::string const& filename,
401  std::vector<bool> const& sample_filter
402 ) {
404  filename, std::vector<size_t>{}, false, sample_filter
405  );
406 }
407 
408 // =================================================================================================
409 // Frequency Table
410 // =================================================================================================
411 
413  std::string const& filename,
414  char separator_char,
415  FrequencyTableInputStream const& reader
416 ) {
418  filename, std::vector<std::string>{}, false, separator_char, reader
419  );
420 }
421 
423  std::string const& filename,
424  std::vector<std::string> const& sample_names_filter,
425  bool inverse_sample_names_filter,
426  char separator_char,
427  FrequencyTableInputStream const& reader
428 ) {
429  // Make an iterator over, using the given reader to take over its settings.
430  // We wrap this in a shared pointer so that this very instance can stay alive
431  // when being copied over to the lambda that we return from this function.
432  auto input = std::make_shared<FrequencyTableInputStream>( reader );
433  input->input_source( utils::from_file( filename ));
434  input->sample_names_filter(
435  std::unordered_set<std::string>( sample_names_filter.begin(), sample_names_filter.end() )
436  );
437  input->inverse_sample_names_filter( inverse_sample_names_filter );
438  input->separator_char( separator_char );
439 
440  // Get the iterators. We store them by copy in the lambda, and these copies are kept alive
441  // when returning from this function. Similar to the sam function above, we here already need
442  // to start the iteration, to have access to data from the file. Might fix in the future.
443  assert( input );
444  auto cur = input->begin();
445  auto end = input->end();
446 
447  // Get the data, using the file base name without path and potential extensions as source.
449  data.file_path = filename;
451  filename, { ".csv", ".csv.gz", ".tsv", ".tsv.gz", ".txt" }
452  );
453 
454  // Get the sample names from the iterator.
455  data.sample_names = cur.sample_names();
456 
457  // The input is copied over to the lambda, and that copy is kept alive
458  // when returning from this function.
459  return VariantInputStream(
460  [ input, cur, end ]( Variant& variant ) mutable {
461  if( cur != end ) {
462  // The deref operator of the iterator is const, so this falls back to copy ctor.
463  // We keep the move here though, in case we change this behaviour later,
464  // and allow to move from the iterator.
465  variant = std::move( *cur );
466  ++cur;
467  return true;
468  } else {
469  return false;
470  }
471  },
472  std::move( data )
473  );
474 }
475 
476 // =================================================================================================
477 // VCF
478 // =================================================================================================
479 
480 // Only available if compiled with htslib
481 #ifdef GENESIS_HTSLIB
482 
487  std::string const& filename,
488  VariantInputStreamFromVcfParams const& params,
489  bool pool_samples,
490  bool use_allelic_depth
491 ) {
492  // We do not expect order by default here. Just to keep it simple. If needed, activate again.
493  const bool expect_ordered = false;
494 
495  // Make an iterator over vcf, and check that the necessary format field AD is present
496  // and of the correct form. We wrap this in a shared pointer so that this very instance
497  // can stay alive when being copied over to the lambda that we return from this function.
498  auto input = std::make_shared<VcfInputStream>(
499  filename, params.sample_names, params.inverse_sample_names, expect_ordered
500  );
501  if(
502  use_allelic_depth &&
503  ! input->header().has_format( "AD", VcfValueType::kInteger, VcfValueSpecial::kReference )
504  ) {
505  throw std::runtime_error(
506  "Cannot iterate over VCF file " + filename + " using the \"AD\" FORMAT " +
507  "field to count allelic depths, as that field is not part of the VCF file."
508  );
509  }
510 
511  // Get the data, using the file base name without path and potential extensions as source.
513  data.file_path = filename;
514  data.source_name = utils::file_basename( filename, { ".gz", ".vcf", ".bcf" });
515  data.sample_names = input->header().get_sample_names();
516 
517  // The input is copied over to the lambda, and that copy is kept alive
518  // when returning from this function.
519  return VariantInputStream(
520  [ input, pool_samples, use_allelic_depth, params ]
521  ( Variant& variant ) mutable {
522  auto& vcf_it = *input;
523 
524  // Only use the lines that have the "AD" field (if needed), and fit the other criteria.
525  // If any test fails, skip this position. In particular, we need to skip everything
526  // that is not a single change (contains ref or alt that is not a single nucleotide)
527  // when working with Pool data.
528  for( ; vcf_it; ++vcf_it ) {
529  if( use_allelic_depth && ! vcf_it->has_format( "AD" ) ) {
530  continue;
531  }
532  if( pool_samples && ! vcf_it->is_snp_or_alt_del() ) {
533  continue;
534  }
535  if( params.only_snps && ! vcf_it->is_snp() ) {
536  continue;
537  }
538  if( params.only_biallelic_snps && vcf_it->get_alternatives_count() != 1 ) {
539  continue;
540  }
541  if( params.only_filter_pass && ! vcf_it->pass_filter() ) {
542  continue;
543  }
544  break;
545  }
546 
547  // Now we are either at a record that fits our needs, or at the end of the input.
548  if( vcf_it ) {
549  assert( ! use_allelic_depth || vcf_it->has_format( "AD" ) );
550  assert( ! params.only_snps || vcf_it->is_snp() );
551  assert( ! params.only_biallelic_snps || vcf_it->get_alternatives_count() == 1 );
552  assert( ! params.only_filter_pass || vcf_it->pass_filter() );
553 
554  // Depending on what type of conversion we want to do (which in turn depends on
555  // the wrapper function that this local function was called from), we switch
556  // between pools and individuals here.
557  if( pool_samples ) {
558  variant = convert_to_variant_as_pool( *vcf_it );
559  } else {
560  variant = convert_to_variant_as_individuals( *vcf_it, use_allelic_depth );
561  }
562 
563  // Set the filter tag, if needed. We need to use reset() here instead of set(),
564  // as the conversion functions might already set the filter status, but we want
565  // the filter setting here to have precedence over, e.g., missing data.
566  if( ! vcf_it->pass_filter() ) {
568  }
569 
570  // Move on to the next input, so that it is ready when this lambda is called again.
571  ++vcf_it;
572  return true;
573  } else {
574  // If we reached the end of the input, return false to signal this.
575  return false;
576  }
577  },
578  std::move( data )
579  );
580 }
581 
583  std::string const& filename,
584  VariantInputStreamFromVcfParams const& params
585 ) {
587  filename, params, true, true
588  );
589 }
590 
592  std::string const& filename,
593  VariantInputStreamFromVcfParams const& params,
594  bool use_allelic_depth
595 ) {
597  filename, params, false, use_allelic_depth
598  );
599 }
600 
601 #endif // GENESIS_HTSLIB
602 
603 } // namespace population
604 } // namespace genesis
genesis::population::VariantInputStreamFromVcfParams::only_snps
bool only_snps
Definition: variant_input_stream_sources.hpp:256
genesis::population::make_sample_name_list_
std::vector< std::string > make_sample_name_list_(std::string const &source_name, size_t size)
Local helper to fill the sample names of file formats without sample names.
Definition: variant_input_stream_sources.cpp:133
genesis::population::make_variant_input_stream_from_pileup_file_
VariantInputStream make_variant_input_stream_from_pileup_file_(std::string const &filename, SimplePileupReader const &reader, std::vector< size_t > const &sample_indices, bool inverse_sample_indices, std::vector< bool > const &sample_filter)
Local helper function that takes care of the three functions below.
Definition: variant_input_stream_sources.cpp:261
functions.hpp
fs.hpp
Provides functions for accessing the file system.
genesis::population::FrequencyTableInputStream
Iterate an input source and parse it as a table of allele frequencies or counts.
Definition: frequency_table_input_stream.hpp:79
genesis::population::VariantInputStreamFromVcfParams::only_filter_pass
bool only_filter_pass
Definition: variant_input_stream_sources.hpp:258
helper.hpp
genesis::population::SimplePileupReader
Reader for line-by-line assessment of (m)pileup files.
Definition: simple_pileup_reader.hpp:78
genesis::utils::from_file
std::shared_ptr< BaseInputSource > from_file(std::string const &file_name, bool detect_compression=true)
Obtain an input source for reading from a file.
Definition: input_source.hpp:68
genesis::population::VariantFilterTag::kNotPassed
@ kNotPassed
Generic indicator that the Variant has not passed a filter.
genesis::population::SamVariantInputStream
Input stream for SAM/BAM/CRAM files that produces a Variant per genome position.
Definition: sam_variant_input_stream.hpp:103
genesis::population::VariantInputStreamFromVcfParams::sample_names
std::vector< std::string > sample_names
Definition: variant_input_stream_sources.hpp:261
genesis::population::make_variant_input_stream_from_sam_file
VariantInputStream make_variant_input_stream_from_sam_file(std::string const &filename, SamVariantInputStream const &reader)
Create a VariantInputStream to iterate the contents of a SAM/BAM/CRAM file as Variants.
Definition: variant_input_stream_sources.cpp:188
variant_input_stream_sources.hpp
genesis::population::make_variant_input_stream_from_sync_file
VariantInputStream make_variant_input_stream_from_sync_file(std::string const &filename)
Create a VariantInputStream to iterate the contents of a PoPoolation2 sync file as Variants.
Definition: variant_input_stream_sources.cpp:381
genesis::population::make_variant_input_stream_from_vcf_file_
VariantInputStream make_variant_input_stream_from_vcf_file_(std::string const &filename, VariantInputStreamFromVcfParams const &params, bool pool_samples, bool use_allelic_depth)
Local helper function that takes care of both main functions below.
Definition: variant_input_stream_sources.cpp:486
genesis::population::VariantInputStreamData::source_name
std::string source_name
User-readable name of the input source.
Definition: stream/variant_input_stream.hpp:73
genesis::population::convert_to_variant_as_individuals
Variant convert_to_variant_as_individuals(VcfRecord const &record, bool use_allelic_depth)
Convert a VcfRecord to a Variant, treating each sample as an individual, and combining them all into ...
Definition: vcf_common.cpp:453
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: function/genome_locus.hpp:52
sample_counts_filter.hpp
genesis::population::VcfValueSpecial::kReference
@ kReference
string.hpp
Provides some commonly used string utility functions.
genesis::population::SimplePileupInputStream
Iterate an input source and parse it as a (m)pileup file.
Definition: simple_pileup_input_stream.hpp:79
genesis::population::make_variant_input_stream_from_vector
VariantInputStream make_variant_input_stream_from_vector(std::vector< Variant > const &variants)
Create a VariantInputStream to iterate the contents of std::vector containing Variants.
Definition: variant_input_stream_sources.cpp:147
logging.hpp
Provides easy and fast logging functionality.
genesis::population::make_variant_input_stream_from_sync_file_
VariantInputStream make_variant_input_stream_from_sync_file_(std::string const &filename, std::vector< size_t > const &sample_indices, bool inverse_sample_indices, std::vector< bool > const &sample_filter)
Definition: variant_input_stream_sources.cpp:336
genesis::population::VcfValueType::kInteger
@ kInteger
genesis::population::Variant
A single variant at a position in a chromosome, along with SampleCounts for a set of samples.
Definition: variant.hpp:65
genesis::population::FilterStatus::reset
void reset()
Definition: filter_status.hpp:117
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::VariantInputStreamFromVcfParams
Parameters to use when streaming through a VCF file as Variants.
Definition: variant_input_stream_sources.hpp:253
genesis::population::SamVariantInputStream::split_by_rg
bool split_by_rg() const
Definition: sam_variant_input_stream.hpp:656
genesis::population::make_variant_input_stream_from_pileup_file
VariantInputStream make_variant_input_stream_from_pileup_file(std::string const &filename, SimplePileupReader const &reader)
Create a VariantInputStream to iterate the contents of a (m)pileup file as Variants.
Definition: variant_input_stream_sources.cpp:302
genesis::population::VariantInputStreamData
Data storage for input-specific information when traversing a variant file.
Definition: stream/variant_input_stream.hpp:61
genesis::population::make_variant_input_stream_from_pool_vcf_file
VariantInputStream make_variant_input_stream_from_pool_vcf_file(std::string const &filename, VariantInputStreamFromVcfParams const &params)
Create a VariantInputStream to iterate the contents of a VCF file as Variants, treating each sample a...
Definition: variant_input_stream_sources.cpp:582
genesis::population::VariantInputStreamFromVcfParams::inverse_sample_names
bool inverse_sample_names
Definition: variant_input_stream_sources.hpp:262
variant_filter.hpp
genesis::population::VariantInputStreamData::sample_names
std::vector< std::string > sample_names
Sample names, for example as found in the file header.
Definition: stream/variant_input_stream.hpp:85
genesis::population::SyncInputStream
Iterate an input source and parse it as a sync file.
Definition: sync_input_stream.hpp:74
genesis::population::VariantInputStream
utils::GenericInputStream< Variant, VariantInputStreamData > VariantInputStream
Iterate Variants, using a variety of input file formats.
Definition: stream/variant_input_stream.hpp:108
genesis::utils::file_basename
std::string file_basename(std::string const &filename)
Remove directory name from file name if present.
Definition: fs.cpp:788
genesis::population::make_input_stream_with_sample_filter_
std::shared_ptr< T > make_input_stream_with_sample_filter_(std::string const &filename, R const &reader, std::vector< size_t > const &sample_indices, bool inverse_sample_indices, std::vector< bool > const &sample_filter)
Local helper function template that takes care of intilizing an input stream, and setting the sample ...
Definition: variant_input_stream_sources.cpp:67
genesis::utils::make_bool_vector_from_indices
std::vector< bool > make_bool_vector_from_indices(std::vector< size_t > const &indices, size_t size)
Helper function to create a bool vector from a set of indices to be set to true.
Definition: utils/math/bitvector/helper.cpp:45
genesis::population::make_variant_input_stream_from_individual_vcf_file
VariantInputStream make_variant_input_stream_from_individual_vcf_file(std::string const &filename, VariantInputStreamFromVcfParams const &params, bool use_allelic_depth)
Create a VariantInputStream to iterate the contents of a VCF file as Variants, treating each sample a...
Definition: variant_input_stream_sources.cpp:591
genesis::population::VariantInputStreamData::file_path
std::string file_path
Full file path, when reading from a file.
Definition: stream/variant_input_stream.hpp:66
genesis::population::SyncReader
Reader for PoPoolation2's "synchronized" files.
Definition: sync_reader.hpp:92
genesis::utils::GenericInputStream
Type erasure for iterators, using std::function to eliminate the underlying input type.
Definition: generic_input_stream.hpp:163
genesis::population::convert_to_variant_as_pool
Variant convert_to_variant_as_pool(VcfRecord const &record)
Convert a VcfRecord to a Variant, treating each sample column as a pool of individuals.
Definition: vcf_common.cpp:393
genesis::population::make_variant_input_stream_from_frequency_table_file
VariantInputStream make_variant_input_stream_from_frequency_table_file(std::string const &filename, char separator_char, FrequencyTableInputStream const &reader)
Create a VariantInputStream to iterate the contents of a frequency table file as Variants.
Definition: variant_input_stream_sources.cpp:412
genesis::population::VariantInputStreamFromVcfParams::only_biallelic_snps
bool only_biallelic_snps
Definition: variant_input_stream_sources.hpp:257
genesis::population::Variant::status
FilterStatus status
Definition: variant.hpp:76