A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
labels.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FUNCTIONS_LABELS_H_
2 #define GENESIS_SEQUENCE_FUNCTIONS_LABELS_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
35 
36 #include <string>
37 #include <utility>
38 #include <unordered_set>
39 
40 namespace genesis {
41 namespace sequence {
42 
43 // =================================================================================================
44 // Forwad Declarations
45 // =================================================================================================
46 
47 class Sequence;
48 class SequenceSet;
49 
50 // =================================================================================================
51 // General
52 // =================================================================================================
53 
57 Sequence const* find_sequence( SequenceSet const& set, std::string const& label );
58 
62 std::unordered_set<std::string> labels( SequenceSet const& set );
63 
80 std::pair<std::string, size_t> guess_sequence_abundance( Sequence const& sequence );
81 
88 std::pair<std::string, size_t> guess_sequence_abundance( std::string const& label );
89 
90 // =================================================================================================
91 // Uniqueness
92 // =================================================================================================
93 
100 bool has_unique_labels( SequenceSet const& set, bool case_sensitive = true );
101 
107 void relabel_with_hash( Sequence& seq, utils::HashingFunctions hash_function );
108 
118 void relabel_with_hash( SequenceSet& set, utils::HashingFunctions hash_function );
119 
120 // =================================================================================================
121 // Validity
122 // =================================================================================================
123 
141 bool is_valid_label( std::string const& label );
142 
151 bool has_valid_label( Sequence const& seq );
152 
161 bool has_valid_labels( SequenceSet const& set );
162 
169 std::string sanitize_label( std::string const& label );
170 
177 void sanitize_label( Sequence& seq );
178 
186 void sanitize_labels( SequenceSet& set );
187 
188 // =================================================================================================
189 // Modifiers
190 // =================================================================================================
191 
199  SequenceSet& set,
200  std::unordered_set<std::string> const& labels,
201  bool invert = false
202 );
203 
204 } // namespace sequence
205 } // namespace genesis
206 
207 #endif // include guard
std::pair< std::string, size_t > guess_sequence_abundance(Sequence const &sequence)
Guess the abundance of a Sequence, using it's label.
Definition: labels.cpp:70
void filter_by_label_list(SequenceSet &set, std::unordered_set< std::string > const &labels, bool invert)
Remove all those Sequences from a SequenceSet whose labels are in the given list. ...
Definition: labels.cpp:240
bool has_unique_labels(SequenceSet const &set, bool case_sensitive)
Return true iff all labels of the Sequences in the SequenceSet are unique.
Definition: labels.cpp:144
bool has_valid_labels(SequenceSet const &set)
Check whether all Sequences in a SequenceSet have valid labels.
Definition: labels.cpp:198
void sanitize_labels(SequenceSet &set)
Sanitize the labels of all Sequences in the SequenceSet by replacing all invalid characters with unde...
Definition: labels.cpp:229
bool is_valid_label(std::string const &label)
Check whether a given string is a valid label for a Sequence.
Definition: labels.cpp:182
std::string sanitize_label(std::string const &label)
Sanitize a label by replacing all invalid characters with underscores.
Definition: labels.cpp:208
std::unordered_set< std::string > labels(SequenceSet const &set)
Return a set of all labels of the SequenceSet.
Definition: labels.cpp:62
void relabel_with_hash(Sequence &seq, utils::HashingFunctions hash_function)
Relabel the Sequence using the hash digest of its sites.
Definition: labels.cpp:165
HashingFunctions
List of the currently implemented hashing functions.
Definition: hashing.hpp:53
bool has_valid_label(Sequence const &seq)
Check whether a Sequence has a valid label.
Definition: labels.cpp:193
Sequence const * find_sequence(SequenceSet const &set, std::string const &label)
Return a pointer to a Sequence with a specific label, or nullptr iff not found.
Definition: labels.cpp:52