|
A library for working with phylogenetic and population genetic data.
v0.32.0
|
|
Go to the documentation of this file. 1 #ifndef GENESIS_POPULATION_FORMAT_FREQUENCY_TABLE_INPUT_STREAM_H_
2 #define GENESIS_POPULATION_FORMAT_FREQUENCY_TABLE_INPUT_STREAM_H_
44 #include <unordered_map>
45 #include <unordered_set>
50 namespace population {
130 size_t index = std::numeric_limits<size_t>::max();
131 bool has_ref =
false;
132 bool has_alt =
false;
133 bool has_frq =
false;
134 bool has_cov =
false;
143 bool has_chr =
false;
144 bool has_pos =
false;
145 bool has_ref =
false;
146 bool has_alt =
false;
152 std::unordered_map<std::string, SampleInfo> sample_infos;
158 bool is_missing =
false;
171 Iterator() =
default;
174 , input_stream_( std::make_shared<utils::InputStream>( parent_->input_source_ ))
175 , sample_data_( std::make_shared<std::vector<SampleData>>() )
176 , current_variant_( std::make_shared<Variant>() )
210 assert( current_variant_ );
211 return &*current_variant_;
221 assert( current_variant_ );
222 return *current_variant_;
258 return parent_ == it.parent_;
263 return !(*
this == it);
286 void parse_header_();
288 void check_header_fields_(
289 std::unordered_set<std::string>
const& all_samplenames
296 void parse_header_field_(
297 std::string
const& field,
298 std::unordered_set<std::string>& all_samplenames
301 int evaluate_if_field_is_chr_( std::string
const& field );
302 int evaluate_if_field_is_pos_( std::string
const& field );
303 int evaluate_if_field_is_ref_( std::string
const& field );
304 int evaluate_if_field_is_alt_( std::string
const& field );
306 int evaluate_if_field_is_sample_ref_(
307 std::string
const& field,
308 std::unordered_set<std::string>& all_samplenames
310 int evaluate_if_field_is_sample_alt_(
311 std::string
const& field,
312 std::unordered_set<std::string>& all_samplenames
314 int evaluate_if_field_is_sample_frq_(
315 std::string
const& field,
316 std::unordered_set<std::string>& all_samplenames
318 int evaluate_if_field_is_sample_cov_(
319 std::string
const& field,
320 std::unordered_set<std::string>& all_samplenames
327 SampleInfo& get_sample_info_( std::string
const& samplename );
328 bool is_ignored_sample_( std::string
const& samplename )
const;
329 static bool parse_if_missing_(
338 bool match_header_field_(
339 std::string
const& field,
340 std::string
const& user_string,
341 std::vector<std::string>
const& predefined_list
344 bool match_header_sample_(
345 std::string
const& field,
346 std::string
const& user_substring,
347 std::vector<std::string>
const& predefined_list,
348 std::string& samplename
351 bool match_header_sample_(
352 std::string
const& field,
353 std::string
const& user_substring,
354 std::vector<std::string>
const& predefined_list1,
355 std::vector<std::string>
const& predefined_list2,
356 std::string& samplename
359 bool match_header_sample_user_partial_(
360 std::string
const& field,
361 std::string
const& substring,
362 std::string& samplename
365 bool match_header_sample_predefined_partial_(
366 std::string
const& field,
367 std::string
const& substring,
368 std::string& samplename
377 void process_sample_data_(
378 SampleInfo
const& sample_info,
379 SampleData
const& sample_data,
394 std::shared_ptr<utils::InputStream> input_stream_;
398 HeaderInfo header_info_;
411 std::shared_ptr<std::vector<SampleData>> sample_data_;
415 std::shared_ptr<Variant> current_variant_;
488 return input_source_;
499 input_source_ = value;
505 return sample_names_filter_;
517 sample_names_filter_ = value;
523 return inverse_sample_names_filter_;
534 inverse_sample_names_filter_ = value;
565 return usr_chr_name_;
587 return usr_pos_name_;
609 return usr_ref_name_;
631 return usr_alt_name_;
649 usr_smp_ref_name_ = str;
661 return usr_smp_ref_name_;
674 usr_smp_alt_name_ = str;
686 return usr_smp_alt_name_;
699 usr_smp_frq_name_ = str;
711 return usr_smp_frq_name_;
725 usr_smp_cov_name_ = str;
737 return usr_smp_cov_name_;
776 return separator_char_;
786 separator_char_ = value;
804 usr_missing_ = value;
841 value > max_int_factor_ ||
844 throw std::runtime_error(
845 "Cannot set int_factor to " +
std::to_string( value ) +
" as this is out of range "
846 "of the int type used for storing base counts."
855 return allowed_rel_freq_error_;
878 allowed_rel_freq_error_ = value;
884 return frequency_is_ref_;
896 frequency_is_ref_ = value;
907 std::shared_ptr<utils::BaseInputSource> input_source_;
908 std::unordered_set<std::string> sample_names_filter_;
909 bool inverse_sample_names_filter_ =
false;
912 std::shared_ptr<::genesis::sequence::ReferenceGenome> ref_genome_;
913 char separator_char_ =
'\t';
918 std::vector<std::string> missing_ = {
".",
"nan",
"na" };
919 std::string usr_missing_;
930 static constexpr
double max_int_factor_ = 9007199254740992.0;
937 static constexpr
double default_int_factor_ = 1000000.0;
942 "Numeric type for SampleCounts does not fit for FrequencyTableInputStream::max_int_factor_"
949 double int_factor_ = default_int_factor_;
953 double allowed_rel_freq_error_ = 0.001;
957 bool frequency_is_ref_ =
true;
961 std::vector<std::string> chr_names_ = {
"chromosome",
"chrom",
"chr",
"contig" };
962 std::vector<std::string> pos_names_ = {
"position",
"pos" };
963 std::vector<std::string> ref_names_ = {
"reference",
"referencebase",
"ref",
"refbase" };
964 std::vector<std::string> alt_names_ = {
"alternative",
"alternativebase",
"alt",
"altbase" };
965 std::vector<std::string> cnt_names_ = {
"counts",
"count",
"cnt",
"ct" };
966 std::vector<std::string> frq_names_ = {
"frequency",
"freq",
"maf",
"af",
"allelefrequency" };
967 std::vector<std::string> cov_names_ = {
"coverage",
"cov",
"readdepth",
"depth",
"ad" };
971 std::string usr_chr_name_;
972 std::string usr_pos_name_;
973 std::string usr_ref_name_;
974 std::string usr_alt_name_;
975 std::string usr_smp_ref_name_;
976 std::string usr_smp_alt_name_;
977 std::string usr_smp_frq_name_;
978 std::string usr_smp_cov_name_;
985 #endif // include guard
std::string to_string(GenomeLocus const &locus)
A single variant at a position in a chromosome, along with SampleCounts for a set of samples.
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
size_t size_type
Public alias for the size type that the class uses to store its counts.