|
A library for working with phylogenetic and population genetic data.
v0.32.0
|
|
Go to the documentation of this file. 1 #ifndef GENESIS_SEQUENCE_FUNCTIONS_FUNCTIONS_H_
2 #define GENESIS_SEQUENCE_FUNCTIONS_FUNCTIONS_H_
65 std::string
const& chars
77 utils::CharLookup<bool>
const& chars
103 SequenceSet
const& set,
117 bool validate_chars( SequenceSet
const& set, std::string
const& chars );
146 void remove_sites( Sequence& seq, utils::Bitvector sites );
158 void remove_sites( SequenceSet& set, utils::Bitvector sites );
200 void replace_characters( Sequence& seq, std::string
const& search,
char replacement );
210 void replace_characters( SequenceSet& set, std::string
const& search,
char replacement );
280 std::string
const& counter_prefix =
"_"
354 std::ostream&
operator << ( std::ostream& out, Sequence
const& seq );
363 std::ostream&
operator << ( std::ostream& out, SequenceSet
const& set );
368 #endif // include guard
void normalize_amino_acid_codes(Sequence &sequence, bool accept_degenerated)
Call normalize_amino_acid_code() for each site of the Sequence.
void remove_characters(Sequence &seq, std::string const &search)
Remove all of the characters in search from the sites of the Sequence.
void normalize_nucleic_acid_codes(Sequence &sequence, bool accept_degenerated)
Call normalize_nucleic_acid_code() for each site of the Sequence.
void replace_t_with_u(Sequence &seq)
Replace all occurrences of T by U in the sites of the Sequence.
void replace_u_with_t(Sequence &seq)
Replace all occurrences of U by T in the sites of the Sequence.
void remove_sites(Sequence &seq, utils::Bitvector sites)
Remove all sites from a Sequence where the given Bitvector is true, and keep all others.
@ kAppendToLabel
The counts are appended to the sequence label, separated by the counter_prefix.
std::ostream & operator<<(std::ostream &out, Sequence const &seq)
Print a Sequence to an ostream in the form "label: sites".
void filter_max_sequence_length(SequenceSet &set, size_t max_length)
Remove all Sequences from the SequenceSet whose length is above the given max_length.
MergeDuplicateSequencesCountPolicy
Provide options for changing how merge_duplicate_sequences() handles the counts of merged Sequences.
void filter_min_max_sequence_length(SequenceSet &set, size_t min_length, size_t max_length)
Remove all Sequences from the SequenceSet whose length is not inbetween the min_length and max_length...
utils::Bitvector gap_sites(Sequence const &seq, std::string const &gap_chars)
Return a Bitvector that is true where the Sequence has a gap and false where not.
bool validate_chars(SequenceSet const &set, std::string const &chars)
Returns true iff all Sequences only consist of the given chars.
void filter_min_sequence_length(SequenceSet &set, size_t min_length)
Remove all Sequences from the SequenceSet whose length is below the given min_length.
std::string nucleic_acid_codes_undetermined()
Return all undetermined nucleic acid codes. Those are NOX.-?.
void merge_duplicate_sequences(SequenceSet &set, MergeDuplicateSequencesCountPolicy count_policy, std::string const &counter_prefix)
Merge all Sequences in a SequenceSet that have identical sites.
size_t longest_sequence_length(SequenceSet const &set)
Return the length of the longest Sequence in the SequenceSet.
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
void remove_gap_sites(SequenceSet &set, std::string const &gap_chars)
Remove all sites that only contain gap characters from the SequenceSet.
utils::Bitvector find_sites(Sequence const &seq, std::string const &chars)
Find sites by character and mark them in a Bitvector.
@ kDiscard
The counts are discarded.
void replace_characters(Sequence &seq, std::string const &search, char replacement)
Replace all occurences of the chars in search by the replace char, for all sites in the given Sequenc...
bool is_alignment(SequenceSet const &set)
Return true iff all Sequences in the SequenceSet have the same length.
size_t total_length(SequenceSet const &set)
Return the total length (sum) of all Sequences in the SequenceSet.
void remove_all_gaps(Sequence &seq, std::string const &gap_chars)
Remove all gap characters from the sites of the Sequence.