A library for working with phylogenetic and population genetic data.
v0.27.0
codes.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FUNCTIONS_CODES_H_
2 #define GENESIS_SEQUENCE_FUNCTIONS_CODES_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2020 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
34 #include <map>
35 #include <string>
36 
37 // =================================================================================================
38 // Forward Declarations
39 // =================================================================================================
40 
41 namespace genesis {
42 namespace utils {
43 
44  class Color;
45 
46 }
47 }
48 
49 namespace genesis {
50 namespace sequence {
51 
52 // =================================================================================================
53 // Codes
54 // =================================================================================================
55 
56 // ---------------------------------------------------------------------
57 // Nucleic Acids
58 // ---------------------------------------------------------------------
59 
63 std::string nucleic_acid_codes_plain();
64 
68 std::string nucleic_acid_codes_degenerated();
69 
74 
80 
84 std::string nucleic_acid_codes_all();
85 
90 std::string nucleic_acid_codes_all_letters();
91 
92 // ---------------------------------------------------------------------
93 // Amino Acids
94 // ---------------------------------------------------------------------
95 
99 std::string amino_acid_codes_plain();
100 
104 std::string amino_acid_codes_degenerated();
105 
113 std::string amino_acid_codes_undetermined();
114 
121 std::string amino_acid_codes_all();
122 
123 // ---------------------------------------------------------------------
124 // Misc
125 // ---------------------------------------------------------------------
126 
134 std::string normalize_code_alphabet( std::string const& alphabet );
135 
149 char normalize_nucleic_acid_code( char code, bool accept_degenerated = true );
150 
164 char normalize_amino_acid_code( char code, bool accept_degenerated = true );
165 
176 std::string reverse_complement( std::string const& sequence, bool accept_degenerated = true );
177 
187 bool nucleic_acid_code_containment( char a, char b, bool undetermined_matches_all = true );
188 
189 // =================================================================================================
190 // Color Codes
191 // =================================================================================================
192 
199 std::map<char, std::string> nucleic_acid_text_colors();
200 
207 std::map<char, std::string> amino_acid_text_colors();
208 
214 std::map<char, utils::Color> nucleic_acid_colors();
215 
221 std::map<char, utils::Color> amino_acid_colors();
222 
223 // =================================================================================================
224 // Code Names
225 // =================================================================================================
226 
257 std::string nucleic_acid_name( char code );
258 
297 std::string amino_acid_name( char code );
298 
336 std::string nucleic_acid_ambiguities( char code );
337 
369 char nucleic_acid_ambiguity_code( std::string codes );
370 
371 } // namespace sequence
372 } // namespace genesis
373 
374 #endif // include guard
genesis::sequence::amino_acid_codes_undetermined
std::string amino_acid_codes_undetermined()
Return all undetermined amino acid codes, and .. Those are X*-?..
Definition: codes.cpp:343
genesis::sequence::nucleic_acid_codes_degenerated
std::string nucleic_acid_codes_degenerated()
Return all degenerated nucleic acid codes. Those are WSMKRYBDHV.
Definition: codes.cpp:300
genesis::sequence::nucleic_acid_code_containment
bool nucleic_acid_code_containment(char a, char b, bool undetermined_matches_all)
Compare two nucleic acid codes and check if they are equal, taking degenerated/ambiguous characters i...
Definition: codes.cpp:572
genesis::sequence::nucleic_acid_codes_plain
std::string nucleic_acid_codes_plain()
Return all plain nucleic acid codes. Those are ACGTU.
Definition: codes.cpp:295
genesis::sequence::nucleic_acid_codes_undetermined_letters
std::string nucleic_acid_codes_undetermined_letters()
Return all undetermined nucleic acid codes that are letters. Those are NOX, that is,...
Definition: codes.cpp:310
genesis::sequence::nucleic_acid_ambiguity_code
char nucleic_acid_ambiguity_code(std::string codes)
Return the nucleic acid code that represents all given codes.
Definition: codes.cpp:689
genesis::sequence::amino_acid_codes_degenerated
std::string amino_acid_codes_degenerated()
Return all degenerated amino acid codes. Those are BJZ.
Definition: codes.cpp:338
genesis::sequence::normalize_code_alphabet
std::string normalize_code_alphabet(std::string const &alphabet)
Normalize an alphabet set of Sequence codes, i.e., make them upper case, sort them,...
Definition: codes.cpp:359
genesis::sequence::normalize_nucleic_acid_code
char normalize_nucleic_acid_code(char code, bool accept_degenerated)
Normalize a nucleic acide code.
Definition: codes.cpp:368
genesis::sequence::nucleic_acid_codes_undetermined
std::string nucleic_acid_codes_undetermined()
Return all undetermined nucleic acid codes. Those are NOX.-?.
Definition: codes.cpp:305
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::sequence::nucleic_acid_codes_all_letters
std::string nucleic_acid_codes_all_letters()
Return all valid nucleic acid codes. Those are ACGTUWSMKRYBDHVNOX, that is, excluding ....
Definition: codes.cpp:322
genesis::sequence::nucleic_acid_text_colors
std::map< char, std::string > nucleic_acid_text_colors()
Return a map of text colors for each nucleic acid code.
Definition: codes.cpp:638
genesis::sequence::nucleic_acid_ambiguities
std::string nucleic_acid_ambiguities(char code)
Return the possible ambiguous nucleic acid codes for a given code char.
Definition: codes.cpp:680
genesis::sequence::reverse_complement
std::string reverse_complement(std::string const &sequence, bool accept_degenerated)
Get the reverse complement of a nucleic acid sequence.
Definition: codes.cpp:501
genesis::sequence::amino_acid_name
std::string amino_acid_name(char code)
Get the name of a amino acid given its IUPAC code.
Definition: codes.cpp:671
genesis::sequence::normalize_amino_acid_code
char normalize_amino_acid_code(char code, bool accept_degenerated)
Normalize an amino acid code.
Definition: codes.cpp:427
genesis::sequence::amino_acid_codes_all
std::string amino_acid_codes_all()
Return all valid amino acid codes, and .. Those are ACDEFGHIKLMNOPQRSTUVWYBJZX*-?....
Definition: codes.cpp:348
genesis::sequence::nucleic_acid_colors
std::map< char, utils::Color > nucleic_acid_colors()
Return a map of Colors for each nucleic acid code.
Definition: codes.cpp:648
genesis::sequence::amino_acid_text_colors
std::map< char, std::string > amino_acid_text_colors()
Return a map of text colors for each amino acid code.
Definition: codes.cpp:643
genesis::sequence::nucleic_acid_codes_all
std::string nucleic_acid_codes_all()
Return all valid nucleic acid codes. Those are ACGTUWSMKRYBDHVNOX.-?.
Definition: codes.cpp:315
genesis::sequence::amino_acid_codes_plain
std::string amino_acid_codes_plain()
Return all plain amino acid codes. Those are ACDEFGHIKLMNOPQRSTUVWY.
Definition: codes.cpp:333
genesis::sequence::nucleic_acid_name
std::string nucleic_acid_name(char code)
Get the name of a nucleic acid given its IUPAC code.
Definition: codes.cpp:662
genesis::sequence::amino_acid_colors
std::map< char, utils::Color > amino_acid_colors()
Return a map of Colors for each amino acid code.
Definition: codes.cpp:653