A library for working with phylogenetic and population genetic data.
v0.27.0
labels.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FUNCTIONS_LABELS_H_
2 #define GENESIS_SEQUENCE_FUNCTIONS_LABELS_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
35 
36 #include <string>
37 #include <utility>
38 #include <unordered_map>
39 #include <unordered_set>
40 
41 namespace genesis {
42 namespace sequence {
43 
44 // =================================================================================================
45 // Forwad Declarations
46 // =================================================================================================
47 
48 class Sequence;
49 class SequenceSet;
50 
51 // =================================================================================================
52 // Helper Structs
53 // =================================================================================================
54 
56 {
57  std::string label;
58  std::unordered_map<std::string, std::string> attributes;
59 };
60 
61 // =================================================================================================
62 // General
63 // =================================================================================================
64 
68 Sequence const* find_sequence( SequenceSet const& set, std::string const& label );
69 
73 std::unordered_set<std::string> labels( SequenceSet const& set );
74 
92 std::pair<std::string, size_t> guess_sequence_abundance( Sequence const& sequence );
93 
100 std::pair<std::string, size_t> guess_sequence_abundance( std::string const& label );
101 
121 LabelAttributes label_attributes( Sequence const& sequence );
122 
129 LabelAttributes label_attributes( std::string const& label );
130 
131 // =================================================================================================
132 // Uniqueness
133 // =================================================================================================
134 
141 bool has_unique_labels( SequenceSet const& set, bool case_sensitive = true );
142 
148 void relabel_with_hash( Sequence& seq, utils::HashingFunctions hash_function );
149 
159 void relabel_with_hash( SequenceSet& set, utils::HashingFunctions hash_function );
160 
161 // =================================================================================================
162 // Validity
163 // =================================================================================================
164 
182 bool is_valid_label( std::string const& label );
183 
192 bool has_valid_label( Sequence const& seq );
193 
202 bool has_valid_labels( SequenceSet const& set );
203 
210 std::string sanitize_label( std::string const& label );
211 
218 void sanitize_label( Sequence& seq );
219 
227 void sanitize_labels( SequenceSet& set );
228 
229 // =================================================================================================
230 // Modifiers
231 // =================================================================================================
232 
240  SequenceSet& set,
241  std::unordered_set<std::string> const& labels,
242  bool invert = false
243 );
244 
245 } // namespace sequence
246 } // namespace genesis
247 
248 #endif // include guard
genesis::sequence::is_valid_label
bool is_valid_label(std::string const &label)
Check whether a given string is a valid label for a Sequence.
Definition: labels.cpp:223
genesis::sequence::find_sequence
Sequence const * find_sequence(SequenceSet const &set, std::string const &label)
Return a pointer to a Sequence with a specific label, or nullptr iff not found.
Definition: labels.cpp:54
genesis::sequence::filter_by_label_list
void filter_by_label_list(SequenceSet &set, std::unordered_set< std::string > const &labels, bool invert)
Remove all those Sequences from a SequenceSet whose labels are in the given list.
Definition: labels.cpp:281
genesis::utils::HashingFunctions
HashingFunctions
List of the currently implemented hashing functions.
Definition: utils/tools/hash/functions.hpp:53
genesis::sequence::Sequence
Definition: sequence/sequence.hpp:40
genesis::sequence::LabelAttributes
Definition: labels.hpp:55
genesis::sequence::has_valid_labels
bool has_valid_labels(SequenceSet const &set)
Check whether all Sequences in a SequenceSet have valid labels.
Definition: labels.cpp:239
genesis::sequence::sanitize_label
std::string sanitize_label(std::string const &label)
Sanitize a label by replacing all invalid characters with underscores.
Definition: labels.cpp:249
genesis::sequence::labels
std::unordered_set< std::string > labels(SequenceSet const &set)
Return a set of all labels of the SequenceSet.
Definition: labels.cpp:64
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::sequence::SequenceSet
Store a set of Sequences.
Definition: sequence_set.hpp:59
genesis::sequence::relabel_with_hash
void relabel_with_hash(Sequence &seq, utils::HashingFunctions hash_function)
Relabel the Sequence using the hash digest of its sites.
Definition: labels.cpp:206
genesis::sequence::LabelAttributes::label
std::string label
Definition: labels.hpp:57
genesis::sequence::has_unique_labels
bool has_unique_labels(SequenceSet const &set, bool case_sensitive)
Return true iff all labels of the Sequences in the SequenceSet are unique.
Definition: labels.cpp:185
genesis::sequence::sanitize_labels
void sanitize_labels(SequenceSet &set)
Sanitize the labels of all Sequences in the SequenceSet by replacing all invalid characters with unde...
Definition: labels.cpp:270
functions.hpp
genesis::sequence::guess_sequence_abundance
std::pair< std::string, size_t > guess_sequence_abundance(Sequence const &sequence)
Guess the abundance of a Sequence, using it's label.
Definition: labels.cpp:72
genesis::sequence::LabelAttributes::attributes
std::unordered_map< std::string, std::string > attributes
Definition: labels.hpp:58
genesis::sequence::label_attributes
LabelAttributes label_attributes(Sequence const &sequence)
Get the attributes list (semicolons-separated) from a Sequence.
Definition: labels.cpp:155
genesis::sequence::has_valid_label
bool has_valid_label(Sequence const &seq)
Check whether a Sequence has a valid label.
Definition: labels.cpp:234