A toolkit for working with phylogenetic data.
v0.19.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
labels.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FUNCTIONS_LABELS_H_
2 #define GENESIS_SEQUENCE_FUNCTIONS_LABELS_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
34 #include <string>
35 #include <unordered_set>
36 
37 namespace genesis {
38 namespace sequence {
39 
40 // =================================================================================================
41 // Forwad Declarations
42 // =================================================================================================
43 
44 class Sequence;
45 class SequenceSet;
46 
47 // =================================================================================================
48 // General
49 // =================================================================================================
50 
54 Sequence const* find_sequence( SequenceSet const& set, std::string const& label );
55 
59 std::unordered_set<std::string> labels( SequenceSet const& set );
60 
73 size_t guess_sequence_abundance( Sequence const& sequence );
74 
81 size_t guess_sequence_abundance( std::string const& label );
82 
83 // =================================================================================================
84 // Uniqueness
85 // =================================================================================================
86 
93 bool has_unique_labels( SequenceSet const& set, bool case_sensitive = true );
94 
98 void relabel_sha1( Sequence& seq );
99 
108 void relabel_sha1( SequenceSet& set );
109 
113 void relabel_sha256( Sequence& seq );
114 
123 void relabel_sha256( SequenceSet& set );
124 
128 void relabel_md5( Sequence& seq );
129 
138 void relabel_md5( SequenceSet& set );
139 
140 // =================================================================================================
141 // Validity
142 // =================================================================================================
143 
161 bool is_valid_label( std::string const& label );
162 
171 bool has_valid_label( Sequence const& seq );
172 
181 bool has_valid_labels( SequenceSet const& set );
182 
189 std::string sanitize_label( std::string const& label );
190 
197 void sanitize_label( Sequence& seq );
198 
206 void sanitize_labels( SequenceSet& set );
207 
208 // =================================================================================================
209 // Modifiers
210 // =================================================================================================
211 
219  SequenceSet& set,
220  std::unordered_set<std::string> const& labels,
221  bool invert = false
222 );
223 
224 } // namespace sequence
225 } // namespace genesis
226 
227 #endif // include guard
void relabel_sha256(Sequence &seq)
Relabel the Sequence using the SHA256 hash digest of its sites.
Definition: labels.cpp:167
void relabel_sha1(Sequence &seq)
Relabel the Sequence using the SHA1 hash digest of its sites.
Definition: labels.cpp:154
size_t guess_sequence_abundance(Sequence const &sequence)
Guess the abundance of a Sequence, using it's label.
Definition: labels.cpp:70
void filter_by_label_list(SequenceSet &set, std::unordered_set< std::string > const &labels, bool invert)
Remove all those Sequences from a SequenceSet whose labels are in the given list. ...
Definition: labels.cpp:255
bool has_unique_labels(SequenceSet const &set, bool case_sensitive)
Return true iff all labels of the Sequences in the SequenceSet are unique.
Definition: labels.cpp:133
void relabel_md5(Sequence &seq)
Relabel the Sequence using the MD5 hash digest of its sites.
Definition: labels.cpp:180
bool has_valid_labels(SequenceSet const &set)
Check whether all Sequences in a SequenceSet have valid labels.
Definition: labels.cpp:213
void sanitize_labels(SequenceSet &set)
Sanitize the labels of all Sequences in the SequenceSet by replacing all invalid characters with unde...
Definition: labels.cpp:244
bool is_valid_label(std::string const &label)
Check whether a given string is a valid label for a Sequence.
Definition: labels.cpp:197
std::string sanitize_label(std::string const &label)
Sanitize a label by replacing all invalid characters with underscores.
Definition: labels.cpp:223
std::unordered_set< std::string > labels(SequenceSet const &set)
Return a set of all labels of the SequenceSet.
Definition: labels.cpp:62
bool has_valid_label(Sequence const &seq)
Check whether a Sequence has a valid label.
Definition: labels.cpp:208
Sequence const * find_sequence(SequenceSet const &set, std::string const &label)
Return a pointer to a Sequence with a specific label, or nullptr iff not found.
Definition: labels.cpp:52