A library for working with phylogenetic and population genetic data.
v0.27.0
sequence/functions/functions.cpp File Reference
#include "genesis/sequence/functions/functions.hpp"
#include "genesis/sequence/sequence_set.hpp"
#include "genesis/sequence/sequence.hpp"
#include "genesis/sequence/printers/simple.hpp"
#include "genesis/utils/core/logging.hpp"
#include "genesis/utils/text/string.hpp"
#include "genesis/utils/text/style.hpp"
#include <algorithm>
#include <array>
#include <cassert>
#include <cctype>
#include <numeric>
#include <ostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <unordered_set>

Go to the source code of this file.

Namespaces

 genesis
 Container namespace for all symbols of genesis in order to keep them separate when used as a library.
 
 genesis::sequence
 

Functions

void filter_max_sequence_length (SequenceSet &set, size_t max_length)
 Remove all Sequences from the SequenceSet whose length is above the given max_length. More...
 
void filter_min_max_sequence_length (SequenceSet &set, size_t min_length, size_t max_length)
 Remove all Sequences from the SequenceSet whose length is not inbetween the min_length and max_length. More...
 
void filter_min_sequence_length (SequenceSet &set, size_t min_length)
 Remove all Sequences from the SequenceSet whose length is below the given min_length. More...
 
utils::Bitvector find_sites (Sequence const &seq, std::string const &chars)
 Find sites by character and mark them in a Bitvector. More...
 
utils::Bitvector find_sites (Sequence const &seq, utils::CharLookup< bool > const &chars)
 Find sites by character and mark them in a Bitvector. More...
 
utils::Bitvector gap_sites (Sequence const &seq, std::string const &gap_chars=nucleic_acid_codes_undetermined())
 Return a Bitvector that is true where the Sequence has a gap and false where not. More...
 
utils::Bitvector gap_sites (SequenceSet const &set, std::string const &gap_chars=nucleic_acid_codes_undetermined())
 Return a Bitvector that is true where all Sequences in the SequenceSet have a gap and false where not, that is, where at least on Sequence is not a gap. More...
 
bool is_alignment (SequenceSet const &set)
 Return true iff all Sequences in the SequenceSet have the same length. More...
 
size_t longest_sequence_length (SequenceSet const &set)
 Return the length of the longest Sequence in the SequenceSet. More...
 
void merge_duplicate_sequences (SequenceSet &set, MergeDuplicateSequencesCountPolicy count_policy=MergeDuplicateSequencesCountPolicy::kDiscard, std::string const &counter_prefix="_")
 Merge all Sequences in a SequenceSet that have identical sites. More...
 
void normalize_amino_acid_codes (Sequence &sequence, bool accept_degenerated=true)
 Call normalize_amino_acid_code() for each site of the Sequence. More...
 
void normalize_amino_acid_codes (SequenceSet &sequence_set, bool accept_degenerated=true)
 Call normalize_amino_acid_code() for each site of all Sequences in the SequenceSet. More...
 
void normalize_nucleic_acid_codes (Sequence &sequence, bool accept_degenerated=true)
 Call normalize_nucleic_acid_code() for each site of the Sequence. More...
 
void normalize_nucleic_acid_codes (SequenceSet &sequence_set, bool accept_degenerated=true)
 Call normalize_nucleic_acid_code() for each site of all Sequences in the SequenceSet. More...
 
std::ostream & operator<< (std::ostream &out, Sequence const &seq)
 Print a Sequence to an ostream in the form "label: sites". More...
 
std::ostream & operator<< (std::ostream &out, SequenceSet const &set)
 Print a SequenceSet to an ostream in the form "label: sites". More...
 
void remove_all_gaps (Sequence &seq, std::string const &gap_chars=nucleic_acid_codes_undetermined())
 Remove all gap characters from the sites of the Sequence. More...
 
void remove_all_gaps (SequenceSet &set, std::string const &gap_chars=nucleic_acid_codes_undetermined())
 Remove all gap characters from the sites of the Sequences in the SequenceSet. More...
 
void remove_characters (Sequence &seq, std::string const &search)
 Remove all of the characters in search from the sites of the Sequence. More...
 
void remove_characters (SequenceSet &set, std::string const &search)
 Remove all of the characters in search from the sites of the Sequences in the SequenceSet. More...
 
void remove_gap_sites (SequenceSet &set, std::string const &gap_chars=nucleic_acid_codes_undetermined())
 Remove all sites that only contain gap characters from the SequenceSet. More...
 
void remove_sites (Sequence &seq, utils::Bitvector sites)
 Remove all sites from a Sequence where the given Bitvector is true, and keep all others. More...
 
void remove_sites (SequenceSet &set, utils::Bitvector sites)
 Remove all sites from all Sequences in a SequenceSet where the given Bitvector is true, and keep all others. More...
 
void replace_characters (Sequence &seq, std::string const &search, char replacement)
 Replace all occurences of the chars in search by the replace char, for all sites in the given Sequence. More...
 
void replace_characters (SequenceSet &set, std::string const &search, char replacement)
 Replace all occurences of the chars in search by the replace char, for all sites in the Sequences in the given SequenceSet. More...
 
void replace_t_with_u (Sequence &seq)
 Replace all occurrences of T by U in the sites of the Sequence. More...
 
void replace_t_with_u (SequenceSet &set)
 Replace all occurrences of T by U in the sites of all Sequences in the SequenceSet. More...
 
void replace_u_with_t (Sequence &seq)
 Replace all occurrences of U by T in the sites of the Sequence. More...
 
void replace_u_with_t (SequenceSet &set)
 Replace all occurrences of U by T in the sites of all Sequences in the SequenceSet. More...
 
size_t total_length (SequenceSet const &set)
 Return the total length (sum) of all Sequences in the SequenceSet. More...
 
bool validate_chars (SequenceSet const &set, std::string const &chars)
 Returns true iff all Sequences only consist of the given chars. More...