A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sequence/functions/functions.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FUNCTIONS_FUNCTIONS_H_
2 #define GENESIS_SEQUENCE_FUNCTIONS_FUNCTIONS_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2017 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
36 
37 #include <iosfwd>
38 #include <map>
39 #include <string>
40 
41 namespace genesis {
42 namespace sequence {
43 
44 // =================================================================================================
45 // Forwad Declarations
46 // =================================================================================================
47 
48 class Sequence;
49 class SequenceSet;
50 
51 // =================================================================================================
52 // Characteristics
53 // =================================================================================================
54 
63 utils::Bitvector gap_sites(
64  Sequence const& seq,
65  std::string const& gap_chars = nucleic_acid_codes_undetermined()
66 );
67 
77 utils::Bitvector gap_sites(
78  SequenceSet const& set,
79  std::string const& gap_chars = nucleic_acid_codes_undetermined()
80 );
81 
92 bool validate_chars( SequenceSet const& set, std::string const& chars );
93 
97 size_t longest_sequence_length( SequenceSet const& set );
98 
102 size_t total_length( SequenceSet const& set );
103 
107 bool is_alignment( SequenceSet const& set );
108 
109 // =================================================================================================
110 // Modifiers
111 // =================================================================================================
112 
121 void remove_sites( Sequence& seq, utils::Bitvector sites );
122 
133 void remove_sites( SequenceSet& set, utils::Bitvector sites );
134 
138 void remove_gap_sites( SequenceSet& set, std::string const& gap_chars = nucleic_acid_codes_undetermined() );
139 
143 void remove_characters( Sequence& seq, std::string const& search );
144 
149 void remove_characters( SequenceSet& set, std::string const& search );
150 
157 void remove_all_gaps( Sequence& seq, std::string const& gap_chars = nucleic_acid_codes_undetermined() );
158 
165 void remove_all_gaps( SequenceSet& set, std::string const& gap_chars = nucleic_acid_codes_undetermined() );
166 
175 void replace_characters( Sequence& seq, std::string const& search, char replacement );
176 
185 void replace_characters( SequenceSet& set, std::string const& search, char replacement );
186 
193 void replace_u_with_t( Sequence& seq );
194 
201 void replace_u_with_t( SequenceSet& set );
202 
209 void replace_t_with_u( Sequence& seq );
210 
217 void replace_t_with_u( SequenceSet& set );
218 
227 {
231  kDiscard,
232 
237 
242 };
243 
257  SequenceSet& set,
259  std::string const& counter_prefix = "_"
260 );
261 
262 // =================================================================================================
263 // Filters
264 // =================================================================================================
265 
272 void filter_min_sequence_length( SequenceSet& set, size_t min_length );
273 
280 void filter_max_sequence_length( SequenceSet& set, size_t max_length );
281 
289 void filter_min_max_sequence_length( SequenceSet& set, size_t min_length, size_t max_length );
290 
291 // =================================================================================================
292 // Print and Output
293 // =================================================================================================
294 
301 std::ostream& operator << ( std::ostream& out, Sequence const& seq );
302 
310 std::ostream& operator << ( std::ostream& out, SequenceSet const& set );
311 
312 } // namespace sequence
313 } // namespace genesis
314 
315 #endif // include guard
void remove_characters(Sequence &seq, std::string const &search)
Remove all of the characters in search from the sites of the Sequence.
The counts are appended to the sequence label, separated by the counter_prefix.
utils::Bitvector gap_sites(Sequence const &seq, std::string const &gap_chars)
Return a Bitvector that is true where the Sequence has a gap and false where not. ...
void filter_min_sequence_length(SequenceSet &set, size_t min_length)
Remove all Sequences from the SequenceSet whose length is below the given min_length.
The counts are appended to the sequence metadata, separated by the counter_prefix.
void remove_gap_sites(SequenceSet &set, std::string const &gap_chars)
Remove all sites that only contain gap characters from the SequenceSet.
void remove_sites(Sequence &seq, utils::Bitvector sites)
Remove all sites from a Sequence where the given Bitvector is true, and keep all others.
bool validate_chars(SequenceSet const &set, std::string const &chars)
Returns true iff all Sequences only consist of the given chars.
std::string nucleic_acid_codes_undetermined()
Return all undetermined nucleic acid codes. Those are "NOX.-?".
Definition: codes.cpp:303
void filter_max_sequence_length(SequenceSet &set, size_t max_length)
Remove all Sequences from the SequenceSet whose length is above the given max_length.
void replace_t_with_u(Sequence &seq)
Replace all occurrences of T by U in the sites of the Sequence.
void replace_u_with_t(Sequence &seq)
Replace all occurrences of U by T in the sites of the Sequence.
void filter_min_max_sequence_length(SequenceSet &set, size_t min_length, size_t max_length)
Remove all Sequences from the SequenceSet whose length is not inbetween the min_length and max_length...
std::ostream & operator<<(std::ostream &out, Sequence const &seq)
Print a Sequence to an ostream in the form "label: sites".
void merge_duplicate_sequences(SequenceSet &set, MergeDuplicateSequencesCountPolicy count_policy, std::string const &counter_prefix)
Merge all Sequences in a SequenceSet that have identical sites.
MergeDuplicateSequencesCountPolicy
Provide options for changing how merge_duplicate_sequences() handles the counts of merged Sequences...
size_t total_length(SequenceSet const &set)
Return the total length (sum) of all Sequences in the SequenceSet.
void remove_all_gaps(Sequence &seq, std::string const &gap_chars)
Remove all gap characters from the sites of the Sequence.
bool is_alignment(SequenceSet const &set)
Return true iff all Sequences in the SequenceSet have the same length.
size_t longest_sequence_length(SequenceSet const &set)
Return the length of the longest Sequence in the SequenceSet.
void replace_characters(Sequence &seq, std::string const &search, char replacement)
Replace all occurences of the chars in search by the replace char, for all sites in the given Sequenc...