A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
labels.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FUNCTIONS_LABELS_H_
2 #define GENESIS_SEQUENCE_FUNCTIONS_LABELS_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2017 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
34 #include <string>
35 #include <unordered_set>
36 
37 namespace genesis {
38 namespace sequence {
39 
40 // =================================================================================================
41 // Forwad Declarations
42 // =================================================================================================
43 
44 class Sequence;
45 class SequenceSet;
46 
47 // =================================================================================================
48 // General
49 // =================================================================================================
50 
54 Sequence const* find_sequence( SequenceSet const& set, std::string const& label );
55 
59 std::unordered_set<std::string> labels( SequenceSet const& set );
60 
61 // =================================================================================================
62 // Uniqueness
63 // =================================================================================================
64 
71 bool has_unique_labels( SequenceSet const& set, bool case_sensitive = true );
72 
76 void relabel_sha1( Sequence& seq );
77 
86 void relabel_sha1( SequenceSet& set );
87 
88 // =================================================================================================
89 // Validity
90 // =================================================================================================
91 
109 bool is_valid_label( std::string const& label );
110 
119 bool has_valid_label( Sequence const& seq );
120 
129 bool has_valid_labels( SequenceSet const& set );
130 
137 std::string sanitize_label( std::string const& label );
138 
145 void sanitize_label( Sequence& seq );
146 
154 void sanitize_labels( SequenceSet& set );
155 
156 // =================================================================================================
157 // Modifiers
158 // =================================================================================================
159 
167  SequenceSet& set,
168  std::unordered_set<std::string> const& labels,
169  bool invert = false
170 );
171 
172 } // namespace sequence
173 } // namespace genesis
174 
175 #endif // include guard
void relabel_sha1(Sequence &seq)
Relabel the Sequence using the SHA1 hash digest of its sites.
Definition: labels.cpp:92
void filter_by_label_list(SequenceSet &set, std::unordered_set< std::string > const &labels, bool invert)
Remove all those Sequences from a SequenceSet whose labels are in the given list. ...
Definition: labels.cpp:167
bool has_unique_labels(SequenceSet const &set, bool case_sensitive)
Return true iff all labels of the Sequences in the SequenceSet are unique.
Definition: labels.cpp:71
bool has_valid_labels(SequenceSet const &set)
Check whether all Sequences in a SequenceSet have valid labels.
Definition: labels.cpp:125
void sanitize_labels(SequenceSet &set)
Sanitize the labels of all Sequences in the SequenceSet by replacing all invalid characters with unde...
Definition: labels.cpp:156
bool is_valid_label(std::string const &label)
Check whether a given string is a valid label for a Sequence.
Definition: labels.cpp:109
std::string sanitize_label(std::string const &label)
Sanitize a label by replacing all invalid characters with underscores.
Definition: labels.cpp:135
std::unordered_set< std::string > labels(SequenceSet const &set)
Return a set of all labels of the SequenceSet.
Definition: labels.cpp:58
bool has_valid_label(Sequence const &seq)
Check whether a Sequence has a valid label.
Definition: labels.cpp:120
Sequence const * find_sequence(SequenceSet const &set, std::string const &label)
Return a pointer to a Sequence with a specific label, or nullptr iff not found.
Definition: labels.cpp:48