|
A library for working with phylogenetic and population genetic data.
v0.32.0
|
|
Go to the documentation of this file.
57 if (s.label() == label) {
66 std::unordered_set<std::string> result;
67 for(
auto const& seq : set ) {
68 result.insert( seq.label() );
79 std::string res_name = label;
83 auto is_digits_ = []( std::string
const& s )
85 return s.find_first_not_of(
"0123456789" ) == std::string::npos;
92 if( la.attributes.count(
"size" ) > 0 && is_digits_( la.attributes.at(
"size" ))) {
93 res_abun = std::stoull( la.attributes.at(
"size" ));
101 auto const upos = label.find_last_of(
"_" );
102 if( upos != std::string::npos && upos + 1 < label.size() && ::isdigit( label[ upos + 1 ]) ) {
105 auto const sub = label.substr( upos + 1 );
106 if( is_digits_( sub ) ) {
107 res_name = label.substr( 0, upos );
108 res_abun = std::stoull( sub );
112 return { res_name, res_abun };
166 assert( attribs.size() > 0 );
167 result.
label = attribs.front();
170 for(
size_t i = 1; i < attribs.size(); ++i ) {
172 if( ap.size() != 2 ) {
173 throw std::runtime_error(
"Invalid Sequence label for extracting label attributes." );
187 std::unordered_set< std::string > label_set;
190 for(
auto const& seq : set ) {
191 if( case_sensitive ) {
197 if( label_set.count( label ) > 0 ) {
200 label_set.insert( label );
214 for(
auto& seq : set ) {
225 std::string invalid_chars =
":,();[]'";
226 for(
auto c : label ) {
227 if( ! isgraph(c) || invalid_chars.find( c ) != std::string::npos ) {
241 for(
auto const& seq : set ) {
252 result.reserve( label.size() );
254 std::string
const invalid_chars =
":,();[]'";
255 for(
auto c : label ) {
256 if( ! isgraph(c) || invalid_chars.find( c ) != std::string::npos ) {
272 for(
auto& seq : set ) {
283 std::unordered_set<std::string>
const&
labels,
286 auto new_last = std::remove_if(
290 return ( !invert && labels.count( seq.label() ) > 0 ) ||
291 ( invert && labels.count( seq.label() ) == 0 );
bool is_valid_label(std::string const &label)
Check whether a given string is a valid label for a Sequence.
Sequence const * find_sequence(SequenceSet const &set, std::string const &label)
Return a pointer to a Sequence with a specific label, or nullptr iff not found.
void filter_by_label_list(SequenceSet &set, std::unordered_set< std::string > const &labels, bool invert)
Remove all those Sequences from a SequenceSet whose labels are in the given list.
HashingFunctions
List of the currently implemented hashing functions.
std::string hash_hex(std::shared_ptr< BaseInputSource > source, HashingFunctions hash_fct)
Calculate the hash of an input source, using a given hashing function, and return its hex representat...
bool has_valid_labels(SequenceSet const &set)
Check whether all Sequences in a SequenceSet have valid labels.
std::shared_ptr< BaseInputSource > from_string(std::string const &input_string)
Obtain an input source for reading from a string.
std::vector< std::string > split(std::string const &str, char delimiter, const bool trim_empty)
Spilt a string into parts, given a delimiter char.
Provides some commonly used string utility functions.
std::string sanitize_label(std::string const &label)
Sanitize a label by replacing all invalid characters with underscores.
std::unordered_set< std::string > labels(SequenceSet const &set)
Return a set of all labels of the SequenceSet.
void remove(size_t index)
Remove the Sequence at a given index from the SequenceSet.
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Store a set of Sequences.
void relabel_with_hash(Sequence &seq, utils::HashingFunctions hash_function)
Relabel the Sequence using the hash digest of its sites.
bool has_unique_labels(SequenceSet const &set, bool case_sensitive)
Return true iff all labels of the Sequences in the SequenceSet are unique.
void sanitize_labels(SequenceSet &set)
Sanitize the labels of all Sequences in the SequenceSet by replacing all invalid characters with unde...
constexpr char to_lower(char c) noexcept
Return the lower case version of a letter, ASCII-only.
std::pair< std::string, size_t > guess_sequence_abundance(Sequence const &sequence)
Guess the abundance of a Sequence, using it's label.
std::unordered_map< std::string, std::string > attributes
LabelAttributes label_attributes(Sequence const &sequence)
Get the attributes list (semicolons-separated) from a Sequence.
bool has_valid_label(Sequence const &seq)
Check whether a Sequence has a valid label.