A library for working with phylogenetic and population genetic data.
v0.32.0
frequency_table_input_stream.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_FORMAT_FREQUENCY_TABLE_INPUT_STREAM_H_
2 #define GENESIS_POPULATION_FORMAT_FREQUENCY_TABLE_INPUT_STREAM_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2024 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@sund.ku.dk>
23  University of Copenhagen, Globe Institute, Section for GeoGenetics
24  Oster Voldgade 5-7, 1350 Copenhagen K, Denmark
25 */
26 
35 
39 
40 #include <functional>
41 #include <limits>
42 #include <memory>
43 #include <string>
44 #include <unordered_map>
45 #include <unordered_set>
46 #include <utility>
47 #include <vector>
48 
49 namespace genesis {
50 namespace population {
51 
52 // =================================================================================================
53 // Frequency Table Input Stream
54 // =================================================================================================
55 
80 {
81 public:
82 
83  // -------------------------------------------------------------------------
84  // Member Types
85  // -------------------------------------------------------------------------
86 
89  using pointer = value_type const*;
90  using reference = value_type const&;
91  using difference_type = std::ptrdiff_t;
92  using iterator_category = std::input_iterator_tag;
93 
94  // ======================================================================================
95  // Internal Iterator
96  // ======================================================================================
97 
98 public:
99 
107  class Iterator
108  {
109  // -------------------------------------------------------------------------
110  // Typedefs and Enums
111  // -------------------------------------------------------------------------
112 
113  public:
114 
117  using pointer = value_type const*;
118  using reference = value_type const&;
119  using iterator_category = std::input_iterator_tag;
120 
121  private:
122 
123  // When parsing the header line, we need to keep track of what information is present.
124  // For this, we use information per sample, as well as about the constant fields.
125 
126  // Per sample information, to make sure that we can actually process a sample.
127  // Do we have ref and alt counts? Do we have frequency and/or read depth?
128  struct SampleInfo
129  {
130  size_t index = std::numeric_limits<size_t>::max();
131  bool has_ref = false;
132  bool has_alt = false;
133  bool has_frq = false;
134  bool has_cov = false;
135  };
136 
137  // All column info, to make sure that we have all the data needed before processing.
138  // Do we have the chromosome and position column? Do we have the ref and alt base column?
139  // Which samples are present, and which data do they contain?
140  struct HeaderInfo
141  {
142  // We need to make sure that we only see each data type once.
143  bool has_chr = false;
144  bool has_pos = false;
145  bool has_ref = false;
146  bool has_alt = false;
147 
148  // We make a list of all sample names that we have seen, noting which type
149  // of field we found, to make sure that we have all relevant ones. The order of the
150  // indices is the order in which the SampleCounts in the final Variant are produced.
151  // Shall be accessed through get_sample_info_() to ensure correct setup of the index.
152  std::unordered_map<std::string, SampleInfo> sample_infos;
153  };
154 
155  // Store the data that is parsed per sample, before processing it into our final Variant.
156  struct SampleData
157  {
158  bool is_missing = false;
159  size_t ref_cnt = 0;
160  size_t alt_cnt = 0;
161  size_t cov = 0;
162  double frq = 0.0;
163  };
164 
165  // -------------------------------------------------------------------------
166  // Constructors and Rule of Five
167  // -------------------------------------------------------------------------
168 
169  private:
170 
171  Iterator() = default;
172  Iterator( FrequencyTableInputStream const* parent )
173  : parent_( parent )
174  , input_stream_( std::make_shared<utils::InputStream>( parent_->input_source_ ))
175  , sample_data_( std::make_shared<std::vector<SampleData>>() )
176  , current_variant_( std::make_shared<Variant>() )
177  {
178  // Safeguard
179  if( ! parent_ ) {
180  return;
181  }
182 
183  // We need a manually constructored shared ptr, as this is private,
184  // and make_shared does not have access to this. Hm nope, seems to work?!
185  // sample_data_ = std::shared_ptr<std::vector<SampleData>>( new std::vector<SampleData>() );
186 
187  // Start streaming the data
188  parse_header_();
189  increment_();
190  }
191 
192  public:
193 
194  ~Iterator() = default;
195 
196  Iterator( self_type const& ) = default;
197  Iterator( self_type&& ) = default;
198 
199  Iterator& operator= ( self_type const& ) = default;
200  Iterator& operator= ( self_type&& ) = default;
201 
203 
204  // -------------------------------------------------------------------------
205  // Accessors
206  // -------------------------------------------------------------------------
207 
208  value_type const* operator->() const
209  {
210  assert( current_variant_ );
211  return &*current_variant_;
212  }
213 
214  // value_type* operator->()
215  // {
216  // return &current_variant_;
217  // }
218 
219  value_type const& operator*() const
220  {
221  assert( current_variant_ );
222  return *current_variant_;
223  }
224 
225  // value_type& operator*()
226  // {
227  // return current_variant_;
228  // }
229 
230  // -------------------------------------------------------------------------
231  // Iteration
232  // -------------------------------------------------------------------------
233 
235  {
236  increment_();
237  return *this;
238  }
239 
240  // self_type operator ++(int)
241  // {
242  // auto cpy = *this;
243  // increment_();
244  // return cpy;
245  // }
246 
256  bool operator==( self_type const& it ) const
257  {
258  return parent_ == it.parent_;
259  }
260 
261  bool operator!=( self_type const& it ) const
262  {
263  return !(*this == it);
264  }
265 
266  // -------------------------------------------------------------------------
267  // Data Access
268  // -------------------------------------------------------------------------
269 
274  std::vector<std::string> sample_names() const;
275 
276  // -------------------------------------------------------------------------
277  // Internal Members
278  // -------------------------------------------------------------------------
279 
280  private:
281 
282  // ---------------------------------------------
283  // Init and Header Parsing
284  // ---------------------------------------------
285 
286  void parse_header_();
287 
288  void check_header_fields_(
289  std::unordered_set<std::string> const& all_samplenames
290  ) const;
291 
292  // ---------------------------------------------
293  // Parse Header Fields
294  // ---------------------------------------------
295 
296  void parse_header_field_(
297  std::string const& field,
298  std::unordered_set<std::string>& all_samplenames
299  );
300 
301  int evaluate_if_field_is_chr_( std::string const& field );
302  int evaluate_if_field_is_pos_( std::string const& field );
303  int evaluate_if_field_is_ref_( std::string const& field );
304  int evaluate_if_field_is_alt_( std::string const& field );
305 
306  int evaluate_if_field_is_sample_ref_(
307  std::string const& field,
308  std::unordered_set<std::string>& all_samplenames
309  );
310  int evaluate_if_field_is_sample_alt_(
311  std::string const& field,
312  std::unordered_set<std::string>& all_samplenames
313  );
314  int evaluate_if_field_is_sample_frq_(
315  std::string const& field,
316  std::unordered_set<std::string>& all_samplenames
317  );
318  int evaluate_if_field_is_sample_cov_(
319  std::string const& field,
320  std::unordered_set<std::string>& all_samplenames
321  );
322 
323  // ---------------------------------------------
324  // Sample Helpers
325  // ---------------------------------------------
326 
327  SampleInfo& get_sample_info_( std::string const& samplename );
328  bool is_ignored_sample_( std::string const& samplename ) const;
329  static bool parse_if_missing_(
330  FrequencyTableInputStream const* parent,
331  genesis::utils::InputStream& input_stream
332  );
333 
334  // ---------------------------------------------
335  // String Matching Helpers
336  // ---------------------------------------------
337 
338  bool match_header_field_(
339  std::string const& field,
340  std::string const& user_string,
341  std::vector<std::string> const& predefined_list
342  ) const;
343 
344  bool match_header_sample_(
345  std::string const& field,
346  std::string const& user_substring,
347  std::vector<std::string> const& predefined_list,
348  std::string& samplename
349  ) const;
350 
351  bool match_header_sample_(
352  std::string const& field,
353  std::string const& user_substring,
354  std::vector<std::string> const& predefined_list1,
355  std::vector<std::string> const& predefined_list2,
356  std::string& samplename
357  ) const;
358 
359  bool match_header_sample_user_partial_(
360  std::string const& field,
361  std::string const& substring,
362  std::string& samplename
363  ) const;
364 
365  bool match_header_sample_predefined_partial_(
366  std::string const& field,
367  std::string const& substring,
368  std::string& samplename
369  ) const;
370 
371  // ---------------------------------------------
372  // Increment and Processing Samples
373  // ---------------------------------------------
374 
375  void increment_();
376 
377  void process_sample_data_(
378  SampleInfo const& sample_info,
379  SampleData const& sample_data,
380  Variant& variant,
381  size_t sample_index
382  );
383 
384  // -------------------------------------------------------------------------
385  // Data Members
386  // -------------------------------------------------------------------------
387 
388  private:
389 
390  // Parent. If null, this indicates the end of the input and that we are done iterating.
391  FrequencyTableInputStream const* parent_ = nullptr;
392 
393  // Data stream to read from.
394  std::shared_ptr<utils::InputStream> input_stream_;
395 
396  // We keep information about the header, and which samples there are.
397  // This also stores which sample has which index, so that we can access them in the Variant.
398  HeaderInfo header_info_;
399 
400  // At the beginning, we guess which columns contain which information, based on their
401  // headers. In order to keep the actual parsing as fast as possible, we use a lambda
402  // for each column that is set up in the beginning. This avoids checking types
403  // of columns for each row over and over again - we simply need to call all processors
404  // in order, across the input file line.
405  std::vector<std::function<void(genesis::utils::InputStream&)>> column_processors_;
406 
407  // We use a set of sample data objects to buffer values in, using a pointer to keep its
408  // address stable (as iterators might be copied or moved implicitly while setting up
409  // a for loop for instance). This is accessed via lambdas that capture the shared pointer.
410  // This data is used to process them into the actual variant later.
411  std::shared_ptr<std::vector<SampleData>> sample_data_;
412 
413  // Same reasoning as above: Using a pointer to keep the address of the Variant stable.
414  // This is the final object that is exposed to the user.
415  std::shared_ptr<Variant> current_variant_;
416  };
417 
418  // ======================================================================================
419  // Main Class
420  // ======================================================================================
421 
422  // -------------------------------------------------------------------------
423  // Constructors and Rule of Five
424  // -------------------------------------------------------------------------
425 
431  FrequencyTableInputStream() = default;
432 
437  std::shared_ptr<utils::BaseInputSource> input_source
438  )
439  // Call the other constuctor, to avoid code duplication.
440  : FrequencyTableInputStream( input_source, std::unordered_set<std::string>{}, false )
441  {}
442 
451  std::shared_ptr<utils::BaseInputSource> input_source,
452  std::unordered_set<std::string> const& sample_names_filter,
453  bool inverse_sample_names_filter = false
454  )
455  : input_source_( input_source )
456  , sample_names_filter_( sample_names_filter )
457  , inverse_sample_names_filter_( inverse_sample_names_filter )
458  {}
459 
460  ~FrequencyTableInputStream() = default;
461 
462  FrequencyTableInputStream( self_type const& ) = default;
463  FrequencyTableInputStream( self_type&& ) = default;
464 
465  self_type& operator= ( self_type const& ) = default;
466  self_type& operator= ( self_type&& ) = default;
467 
468  // -------------------------------------------------------------------------
469  // Iteration
470  // -------------------------------------------------------------------------
471 
472  Iterator begin() const
473  {
474  return Iterator( this );
475  }
476 
477  Iterator end() const
478  {
479  return Iterator();
480  }
481 
482  // -------------------------------------------------------------------------
483  // Basic Input Settings
484  // -------------------------------------------------------------------------
485 
486  std::shared_ptr<utils::BaseInputSource> input_source() const
487  {
488  return input_source_;
489  }
490 
497  self_type& input_source( std::shared_ptr<utils::BaseInputSource> value )
498  {
499  input_source_ = value;
500  return *this;
501  }
502 
503  std::unordered_set<std::string> const& sample_names_filter() const
504  {
505  return sample_names_filter_;
506  }
507 
515  self_type& sample_names_filter( std::unordered_set<std::string> const& value )
516  {
517  sample_names_filter_ = value;
518  return *this;
519  }
520 
522  {
523  return inverse_sample_names_filter_;
524  }
525 
533  {
534  inverse_sample_names_filter_ = value;
535  return *this;
536  }
537 
538  // -------------------------------------------------------------------------
539  // Header Parsing Settings
540  // -------------------------------------------------------------------------
541 
552  self_type& header_chromosome_string( std::string const& str )
553  {
554  usr_chr_name_ = str;
555  return *this;
556  }
557 
563  std::string const& header_chromosome_string() const
564  {
565  return usr_chr_name_;
566  }
567 
574  self_type& header_position_string( std::string const& str )
575  {
576  usr_pos_name_ = str;
577  return *this;
578  }
579 
585  std::string const& header_position_string() const
586  {
587  return usr_pos_name_;
588  }
589 
596  self_type& header_reference_base_string( std::string const& str )
597  {
598  usr_ref_name_ = str;
599  return *this;
600  }
601 
607  std::string const& header_reference_base_string() const
608  {
609  return usr_ref_name_;
610  }
611 
618  self_type& header_alternative_base_string( std::string const& str )
619  {
620  usr_alt_name_ = str;
621  return *this;
622  }
623 
629  std::string const& header_alternative_base_string() const
630  {
631  return usr_alt_name_;
632  }
633 
648  {
649  usr_smp_ref_name_ = str;
650  return *this;
651  }
652 
659  std::string const& header_sample_reference_count_substring() const
660  {
661  return usr_smp_ref_name_;
662  }
663 
673  {
674  usr_smp_alt_name_ = str;
675  return *this;
676  }
677 
684  std::string const& header_sample_alternative_count_substring() const
685  {
686  return usr_smp_alt_name_;
687  }
688 
697  self_type& header_sample_frequency_substring( std::string const& str )
698  {
699  usr_smp_frq_name_ = str;
700  return *this;
701  }
702 
709  std::string const& header_sample_frequency_substring() const
710  {
711  return usr_smp_frq_name_;
712  }
713 
724  {
725  usr_smp_cov_name_ = str;
726  return *this;
727  }
728 
735  std::string const& header_sample_read_depth_substring() const
736  {
737  return usr_smp_cov_name_;
738  }
739 
740  // -------------------------------------------------------------------------
741  // Settings
742  // -------------------------------------------------------------------------
743 
744  std::shared_ptr<::genesis::sequence::ReferenceGenome> reference_genome() const
745  {
746  return ref_genome_;
747  }
748 
768  self_type& reference_genome( std::shared_ptr<::genesis::sequence::ReferenceGenome> value )
769  {
770  ref_genome_ = value;
771  return *this;
772  }
773 
774  char separator_char() const
775  {
776  return separator_char_;
777  }
778 
784  self_type& separator_char( char value )
785  {
786  separator_char_ = value;
787  return *this;
788  }
789 
790  std::string const& missing_value() const
791  {
792  return usr_missing_;
793  }
794 
802  self_type& missing_value( std::string const& value )
803  {
804  usr_missing_ = value;
805  return *this;
806  }
807 
808  double int_factor() const
809  {
810  return int_factor_;
811  }
812 
838  self_type& int_factor( double value )
839  {
840  if(
841  value > max_int_factor_ ||
842  static_cast<double>( static_cast<SampleCounts::size_type>( value )) != value
843  ) {
844  throw std::runtime_error(
845  "Cannot set int_factor to " + std::to_string( value ) + " as this is out of range "
846  "of the int type used for storing base counts."
847  );
848  }
849  int_factor_ = value;
850  return *this;
851  }
852 
854  {
855  return allowed_rel_freq_error_;
856  }
857 
877  {
878  allowed_rel_freq_error_ = value;
879  return *this;
880  }
881 
882  bool frequency_is_ref() const
883  {
884  return frequency_is_ref_;
885  }
886 
895  {
896  frequency_is_ref_ = value;
897  return *this;
898  }
899 
900  // -------------------------------------------------------------------------
901  // Data Members
902  // -------------------------------------------------------------------------
903 
904 private:
905 
906  // Input data.
907  std::shared_ptr<utils::BaseInputSource> input_source_;
908  std::unordered_set<std::string> sample_names_filter_;
909  bool inverse_sample_names_filter_ = false;
910 
911  // Input settings.
912  std::shared_ptr<::genesis::sequence::ReferenceGenome> ref_genome_;
913  char separator_char_ = '\t';
914 
915  // Missing data indicators. We have a default set that we use, and a user-provided one.
916  // Both are used case-insensitive. Need to be sorted so that `nan` comes before `na`,
917  // as otherwise, `na` would match already when its actually `nan`, leading to an error.
918  std::vector<std::string> missing_ = { ".", "nan", "na" };
919  std::string usr_missing_;
920 
921  // When reading frequencies, for now, we want to turn them into counts, as this is what
922  // our data infrastructure of Variant expects. To lose as little precision as possible, we
923  // multiply the frequency [ 0.0, 1.0 ] by the largest integer for which itself and all
924  // smaller integers can be stored in a double exactly. This guarantees that all frequencies
925  // are mapped into the largest double range that is as exact as we can manage here.
926  // Using these fake large counts also basically eliminates Bessel's correction for read depth
927  // that we have in some of our pop gen equations, as we do not want that when working with
928  // frequencies anyway, e.g., when obtaining them from HAF-pipe.
929  // See https://stackoverflow.com/q/1848700/4184258 for the exact double value used here.
930  static constexpr double max_int_factor_ = 9007199254740992.0;
931 
932  // We use a smaller factor by default, to make sure that we can add numbers without reaching
933  // the max int precesion point of double, just in case. We are internally using size_t for
934  // counts, so that should not happen, but prevents accidents. Also, using a power of ten here
935  // it makes numbers converted from data nicer for users and potentially easier to understand,
936  // as they more clearly correspond to the frequencies actually specified in the input data.
937  static constexpr double default_int_factor_ = 1000000.0;
938 
939  // Make sure that this actually fits into the SampleCounts values.
940  static_assert(
941  static_cast<double>( static_cast<SampleCounts::size_type>( max_int_factor_ )) == max_int_factor_,
942  "Numeric type for SampleCounts does not fit for FrequencyTableInputStream::max_int_factor_"
943  );
944 
945  // The above is the max that we can use, but we allow users to set the used int factor that is
946  // used for frequency-based computations, so that they can for example use the expected
947  // read depth information from HAF-pipe, or similar values instead.
948  // double int_factor_ = max_int_factor_;
949  double int_factor_ = default_int_factor_;
950 
951  // When we have multiple pieces of information for a sample, we do cross checks, to make
952  // sure that everything is in order. This relative value here is used for frequencies.
953  double allowed_rel_freq_error_ = 0.001;
954 
955  // What does the frequency mean? We use: true = ref, false = alt frequency.
956  // Probably would be more neat with an enum, but it's only used internally, so that's okay.
957  bool frequency_is_ref_ = true;
958 
959  // Default names for header fields in a csv file that typically describe
960  // the columns and quantities we are interested in. All used case-insensitively.
961  std::vector<std::string> chr_names_ = { "chromosome", "chrom", "chr", "contig" };
962  std::vector<std::string> pos_names_ = { "position", "pos" };
963  std::vector<std::string> ref_names_ = { "reference", "referencebase", "ref", "refbase" };
964  std::vector<std::string> alt_names_ = { "alternative", "alternativebase", "alt", "altbase" };
965  std::vector<std::string> cnt_names_ = { "counts", "count", "cnt", "ct" };
966  std::vector<std::string> frq_names_ = { "frequency", "freq", "maf", "af", "allelefrequency" };
967  std::vector<std::string> cov_names_ = { "coverage", "cov", "readdepth", "depth", "ad" };
968 
969  // User supplied overwrites for the above automatic terms.
970  // If either of them is given, we use those instead of the generic word lists.
971  std::string usr_chr_name_;
972  std::string usr_pos_name_;
973  std::string usr_ref_name_;
974  std::string usr_alt_name_;
975  std::string usr_smp_ref_name_;
976  std::string usr_smp_alt_name_;
977  std::string usr_smp_frq_name_;
978  std::string usr_smp_cov_name_;
979 
980 };
981 
982 } // namespace population
983 } // namespace genesis
984 
985 #endif // include guard
genesis::population::FrequencyTableInputStream::header_sample_frequency_substring
self_type & header_sample_frequency_substring(std::string const &str)
Specify a (sub)string that is the prefix or suffix for header columns containing the frequency of a s...
Definition: frequency_table_input_stream.hpp:697
genesis::population::FrequencyTableInputStream::header_sample_alternative_count_substring
std::string const & header_sample_alternative_count_substring() const
Return the currently set (sub)string that is the prefix or suffix for header columns containing the a...
Definition: frequency_table_input_stream.hpp:684
genesis::utils::InputStream
Stream interface for reading data from an InputSource, that keeps track of line and column counters.
Definition: input_stream.hpp:88
genesis::population::FrequencyTableInputStream::input_source
std::shared_ptr< utils::BaseInputSource > input_source() const
Definition: frequency_table_input_stream.hpp:486
genesis::population::FrequencyTableInputStream::int_factor
double int_factor() const
Definition: frequency_table_input_stream.hpp:808
genesis::population::FrequencyTableInputStream::begin
Iterator begin() const
Definition: frequency_table_input_stream.hpp:472
genesis::population::FrequencyTableInputStream::int_factor
self_type & int_factor(double value)
Set the factor by which frequencies are multiplied if no read depth information is present for a samp...
Definition: frequency_table_input_stream.hpp:838
genesis::population::FrequencyTableInputStream
Iterate an input source and parse it as a table of allele frequencies or counts.
Definition: frequency_table_input_stream.hpp:79
genesis::population::FrequencyTableInputStream::Iterator::operator++
self_type & operator++()
Definition: frequency_table_input_stream.hpp:234
genesis::population::FrequencyTableInputStream::header_sample_read_depth_substring
std::string const & header_sample_read_depth_substring() const
Return the currently set (sub)string that is the prefix or suffix for header columns containing the r...
Definition: frequency_table_input_stream.hpp:735
genesis::population::FrequencyTableInputStream::reference
value_type const & reference
Definition: frequency_table_input_stream.hpp:90
genesis::population::FrequencyTableInputStream::sample_names_filter
std::unordered_set< std::string > const & sample_names_filter() const
Definition: frequency_table_input_stream.hpp:503
genesis::population::FrequencyTableInputStream::FrequencyTableInputStream
FrequencyTableInputStream()=default
Create a default instance, with no input.
genesis::population::FrequencyTableInputStream::header_position_string
std::string const & header_position_string() const
Return the currently set string that marks the position columnn in the header.
Definition: frequency_table_input_stream.hpp:585
genesis::population::FrequencyTableInputStream::Iterator::operator=
Iterator & operator=(self_type const &)=default
genesis::population::FrequencyTableInputStream::Iterator::self_type
FrequencyTableInputStream::Iterator self_type
Definition: frequency_table_input_stream.hpp:115
genesis::population::FrequencyTableInputStream::Iterator::operator!=
bool operator!=(self_type const &it) const
Definition: frequency_table_input_stream.hpp:261
genesis::population::FrequencyTableInputStream::reference_genome
self_type & reference_genome(std::shared_ptr<::genesis::sequence::ReferenceGenome > value)
Reference genome used to phase input data without reference bases.
Definition: frequency_table_input_stream.hpp:768
genesis::population::FrequencyTableInputStream::FrequencyTableInputStream
FrequencyTableInputStream(std::shared_ptr< utils::BaseInputSource > input_source)
Create an instance that reads from an input_source.
Definition: frequency_table_input_stream.hpp:436
genesis::population::FrequencyTableInputStream::inverse_sample_names_filter
bool inverse_sample_names_filter() const
Definition: frequency_table_input_stream.hpp:521
genesis::population::FrequencyTableInputStream::header_reference_base_string
std::string const & header_reference_base_string() const
Return the currently set string that marks the reference base columnn in the header.
Definition: frequency_table_input_stream.hpp:607
genesis::population::FrequencyTableInputStream::operator=
self_type & operator=(self_type const &)=default
input_source.hpp
genesis::population::FrequencyTableInputStream::header_alternative_base_string
std::string const & header_alternative_base_string() const
Return the currently set string that marks the alternative base columnn in the header.
Definition: frequency_table_input_stream.hpp:629
genesis::population::FrequencyTableInputStream::FrequencyTableInputStream
FrequencyTableInputStream(std::shared_ptr< utils::BaseInputSource > input_source, std::unordered_set< std::string > const &sample_names_filter, bool inverse_sample_names_filter=false)
Create an instance that reads from an input_source.
Definition: frequency_table_input_stream.hpp:450
genesis::population::FrequencyTableInputStream::header_sample_alternative_count_substring
self_type & header_sample_alternative_count_substring(std::string const &str)
Specify a (sub)string that is the prefix or suffix for header columns containing the alternative base...
Definition: frequency_table_input_stream.hpp:672
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: function/genome_locus.hpp:52
genesis::population::FrequencyTableInputStream::header_chromosome_string
std::string const & header_chromosome_string() const
Return the currently set string that marks the chromosome columnn in the header.
Definition: frequency_table_input_stream.hpp:563
input_stream.hpp
genesis::population::FrequencyTableInputStream::difference_type
std::ptrdiff_t difference_type
Definition: frequency_table_input_stream.hpp:91
genesis::population::FrequencyTableInputStream::header_alternative_base_string
self_type & header_alternative_base_string(std::string const &str)
Specify a string that marks the alternative base column in the header.
Definition: frequency_table_input_stream.hpp:618
genesis::population::FrequencyTableInputStream::Iterator::operator==
bool operator==(self_type const &it) const
Compare two iterators for equality.
Definition: frequency_table_input_stream.hpp:256
genesis::population::FrequencyTableInputStream::sample_names_filter
self_type & sample_names_filter(std::unordered_set< std::string > const &value)
Set the sample names to filter for.
Definition: frequency_table_input_stream.hpp:515
genesis::population::FrequencyTableInputStream::header_sample_reference_count_substring
std::string const & header_sample_reference_count_substring() const
Return the currently set (sub)string that is the prefix or suffix for header columns containing the r...
Definition: frequency_table_input_stream.hpp:659
genesis::population::FrequencyTableInputStream::Iterator::FrequencyTableInputStream
friend FrequencyTableInputStream
Definition: frequency_table_input_stream.hpp:202
genesis::population::FrequencyTableInputStream::missing_value
std::string const & missing_value() const
Definition: frequency_table_input_stream.hpp:790
genesis::population::FrequencyTableInputStream::allowed_relative_frequency_error
double allowed_relative_frequency_error() const
Definition: frequency_table_input_stream.hpp:853
genesis::population::FrequencyTableInputStream::Iterator::reference
value_type const & reference
Definition: frequency_table_input_stream.hpp:118
genesis::population::FrequencyTableInputStream::separator_char
char separator_char() const
Definition: frequency_table_input_stream.hpp:774
genesis::population::FrequencyTableInputStream::Iterator::iterator_category
std::input_iterator_tag iterator_category
Definition: frequency_table_input_stream.hpp:119
genesis::population::FrequencyTableInputStream::~FrequencyTableInputStream
~FrequencyTableInputStream()=default
genesis::population::FrequencyTableInputStream::missing_value
self_type & missing_value(std::string const &value)
Set the string that indicates missing data.
Definition: frequency_table_input_stream.hpp:802
genesis::population::Variant
A single variant at a position in a chromosome, along with SampleCounts for a set of samples.
Definition: variant.hpp:65
genesis::population::FrequencyTableInputStream::header_reference_base_string
self_type & header_reference_base_string(std::string const &str)
Specify a string that marks the reference base column in the header.
Definition: frequency_table_input_stream.hpp:596
reference_genome.hpp
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::FrequencyTableInputStream::allowed_relative_frequency_error
self_type & allowed_relative_frequency_error(double value)
Allowed error margin for frequencies.
Definition: frequency_table_input_stream.hpp:876
genesis::population::FrequencyTableInputStream::pointer
value_type const * pointer
Definition: frequency_table_input_stream.hpp:89
genesis::population::FrequencyTableInputStream::input_source
self_type & input_source(std::shared_ptr< utils::BaseInputSource > value)
Set the input source.
Definition: frequency_table_input_stream.hpp:497
genesis::population::FrequencyTableInputStream::self_type
FrequencyTableInputStream self_type
Definition: frequency_table_input_stream.hpp:87
genesis::population::FrequencyTableInputStream::header_sample_reference_count_substring
self_type & header_sample_reference_count_substring(std::string const &str)
Specify a (sub)string that is the prefix or suffix for header columns containing the reference base c...
Definition: frequency_table_input_stream.hpp:647
genesis::population::FrequencyTableInputStream::reference_genome
std::shared_ptr<::genesis::sequence::ReferenceGenome > reference_genome() const
Definition: frequency_table_input_stream.hpp:744
genesis::population::FrequencyTableInputStream::Iterator::~Iterator
~Iterator()=default
genesis::population::FrequencyTableInputStream::header_position_string
self_type & header_position_string(std::string const &str)
Specify a string that marks the position column in the header.
Definition: frequency_table_input_stream.hpp:574
genesis::population::FrequencyTableInputStream::frequency_is_ref
bool frequency_is_ref() const
Definition: frequency_table_input_stream.hpp:882
genesis::population::FrequencyTableInputStream::Iterator::operator*
value_type const & operator*() const
Definition: frequency_table_input_stream.hpp:219
genesis::population::FrequencyTableInputStream::end
Iterator end() const
Definition: frequency_table_input_stream.hpp:477
genesis::population::FrequencyTableInputStream::Iterator::sample_names
std::vector< std::string > sample_names() const
Return the sample names found in the header, in the order in which they are in the Variant of each it...
Definition: frequency_table_input_stream.cpp:62
variant.hpp
genesis::population::FrequencyTableInputStream::header_chromosome_string
self_type & header_chromosome_string(std::string const &str)
Specify a string that marks the chromosome column in the header.
Definition: frequency_table_input_stream.hpp:552
genesis::population::FrequencyTableInputStream::iterator_category
std::input_iterator_tag iterator_category
Definition: frequency_table_input_stream.hpp:92
genesis::population::FrequencyTableInputStream::inverse_sample_names_filter
self_type & inverse_sample_names_filter(bool value)
Set whether to reverse the sample names to filter for.
Definition: frequency_table_input_stream.hpp:532
genesis::population::FrequencyTableInputStream::separator_char
self_type & separator_char(char value)
Set the separator char used for parsing the tabluar input data.
Definition: frequency_table_input_stream.hpp:784
genesis::population::FrequencyTableInputStream::Iterator
Iterator over loci of the input sources.
Definition: frequency_table_input_stream.hpp:107
genesis::population::FrequencyTableInputStream::header_sample_read_depth_substring
self_type & header_sample_read_depth_substring(std::string const &str)
Specify a (sub)string that is the prefix or suffix for header columns containing the read depth of a ...
Definition: frequency_table_input_stream.hpp:723
genesis::population::FrequencyTableInputStream::Iterator::pointer
value_type const * pointer
Definition: frequency_table_input_stream.hpp:117
genesis::population::SampleCounts::size_type
size_t size_type
Public alias for the size type that the class uses to store its counts.
Definition: sample_counts.hpp:61
genesis::population::FrequencyTableInputStream::frequency_is_ref
self_type & frequency_is_ref(bool value)
Set whether frequencies are ref or alt frequencies.
Definition: frequency_table_input_stream.hpp:894
genesis::population::FrequencyTableInputStream::header_sample_frequency_substring
std::string const & header_sample_frequency_substring() const
Return the currently set (sub)string that is the prefix or suffix for header columns containing the f...
Definition: frequency_table_input_stream.hpp:709
genesis::population::FrequencyTableInputStream::Iterator::operator->
value_type const * operator->() const
Definition: frequency_table_input_stream.hpp:208