A library for working with phylogenetic and population genetic data.
v0.32.0
codes.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FUNCTIONS_CODES_H_
2 #define GENESIS_SEQUENCE_FUNCTIONS_CODES_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2023 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
34 #include <map>
35 #include <string>
36 
37 // =================================================================================================
38 // Forward Declarations
39 // =================================================================================================
40 
41 namespace genesis {
42 namespace utils {
43 
44  class Color;
45 
46 }
47 }
48 
49 namespace genesis {
50 namespace sequence {
51 
52 // =================================================================================================
53 // Codes
54 // =================================================================================================
55 
56 // ---------------------------------------------------------------------
57 // Nucleic Acids
58 // ---------------------------------------------------------------------
59 
63 std::string nucleic_acid_codes_plain();
64 
68 std::string nucleic_acid_codes_degenerated();
69 
74 
80 
84 std::string nucleic_acid_codes_all();
85 
90 std::string nucleic_acid_codes_all_letters();
91 
92 // ---------------------------------------------------------------------
93 // Amino Acids
94 // ---------------------------------------------------------------------
95 
99 std::string amino_acid_codes_plain();
100 
104 std::string amino_acid_codes_degenerated();
105 
113 std::string amino_acid_codes_undetermined();
114 
121 std::string amino_acid_codes_all();
122 
123 // ---------------------------------------------------------------------
124 // Misc
125 // ---------------------------------------------------------------------
126 
134 std::string normalize_code_alphabet( std::string const& alphabet );
135 
149 char normalize_nucleic_acid_code( char code, bool accept_degenerated = true );
150 
164 char normalize_amino_acid_code( char code, bool accept_degenerated = true );
165 
176 std::string reverse_complement( std::string const& sequence, bool accept_degenerated = true );
177 
184 char nucleic_acid_transition( char code );
185 
195 bool nucleic_acid_code_containment( char a, char b, bool undetermined_matches_all = true );
196 
197 // =================================================================================================
198 // Color Codes
199 // =================================================================================================
200 
207 std::map<char, std::string> nucleic_acid_text_colors();
208 
215 std::map<char, std::string> amino_acid_text_colors();
216 
222 std::map<char, utils::Color> nucleic_acid_colors();
223 
229 std::map<char, utils::Color> amino_acid_colors();
230 
231 // =================================================================================================
232 // Code Names
233 // =================================================================================================
234 
265 std::string nucleic_acid_name( char code );
266 
305 std::string amino_acid_name( char code );
306 
344 std::string nucleic_acid_ambiguities( char code );
345 
377 char nucleic_acid_ambiguity_code( std::string codes );
378 
379 } // namespace sequence
380 } // namespace genesis
381 
382 #endif // include guard
genesis::sequence::amino_acid_codes_undetermined
std::string amino_acid_codes_undetermined()
Return all undetermined amino acid codes, and .. Those are X*-?..
Definition: codes.cpp:343
genesis::sequence::nucleic_acid_codes_degenerated
std::string nucleic_acid_codes_degenerated()
Return all degenerated nucleic acid codes. Those are WSMKRYBDHV.
Definition: codes.cpp:300
genesis::sequence::nucleic_acid_code_containment
bool nucleic_acid_code_containment(char a, char b, bool undetermined_matches_all)
Compare two nucleic acid codes and check if they are equal, taking degenerated/ambiguous characters i...
Definition: codes.cpp:596
genesis::sequence::nucleic_acid_codes_plain
std::string nucleic_acid_codes_plain()
Return all plain nucleic acid codes. Those are ACGTU.
Definition: codes.cpp:295
genesis::sequence::nucleic_acid_codes_undetermined_letters
std::string nucleic_acid_codes_undetermined_letters()
Return all undetermined nucleic acid codes that are letters. Those are NOX, that is,...
Definition: codes.cpp:310
genesis::sequence::nucleic_acid_ambiguity_code
char nucleic_acid_ambiguity_code(std::string codes)
Return the nucleic acid code that represents all given codes.
Definition: codes.cpp:713
genesis::sequence::amino_acid_codes_degenerated
std::string amino_acid_codes_degenerated()
Return all degenerated amino acid codes. Those are BJZ.
Definition: codes.cpp:338
genesis::sequence::normalize_code_alphabet
std::string normalize_code_alphabet(std::string const &alphabet)
Normalize an alphabet set of Sequence codes, i.e., make them upper case, sort them,...
Definition: codes.cpp:359
genesis::sequence::normalize_nucleic_acid_code
char normalize_nucleic_acid_code(char code, bool accept_degenerated)
Normalize a nucleic acide code.
Definition: codes.cpp:368
genesis::sequence::nucleic_acid_codes_undetermined
std::string nucleic_acid_codes_undetermined()
Return all undetermined nucleic acid codes. Those are NOX.-?.
Definition: codes.cpp:305
genesis::sequence::nucleic_acid_transition
char nucleic_acid_transition(char code)
Return the transition base for the given base.
Definition: codes.cpp:572
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::sequence::nucleic_acid_codes_all_letters
std::string nucleic_acid_codes_all_letters()
Return all valid nucleic acid codes. Those are ACGTUWSMKRYBDHVNOX, that is, excluding ....
Definition: codes.cpp:322
genesis::sequence::nucleic_acid_text_colors
std::map< char, std::string > nucleic_acid_text_colors()
Return a map of text colors for each nucleic acid code.
Definition: codes.cpp:662
genesis::sequence::nucleic_acid_ambiguities
std::string nucleic_acid_ambiguities(char code)
Return the possible ambiguous nucleic acid codes for a given code char.
Definition: codes.cpp:704
genesis::sequence::reverse_complement
std::string reverse_complement(std::string const &sequence, bool accept_degenerated)
Get the reverse complement of a nucleic acid sequence.
Definition: codes.cpp:501
genesis::sequence::amino_acid_name
std::string amino_acid_name(char code)
Get the name of a amino acid given its IUPAC code.
Definition: codes.cpp:695
genesis::sequence::normalize_amino_acid_code
char normalize_amino_acid_code(char code, bool accept_degenerated)
Normalize an amino acid code.
Definition: codes.cpp:427
genesis::sequence::amino_acid_codes_all
std::string amino_acid_codes_all()
Return all valid amino acid codes, and .. Those are ACDEFGHIKLMNOPQRSTUVWYBJZX*-?....
Definition: codes.cpp:348
genesis::sequence::nucleic_acid_colors
std::map< char, utils::Color > nucleic_acid_colors()
Return a map of Colors for each nucleic acid code.
Definition: codes.cpp:672
genesis::sequence::amino_acid_text_colors
std::map< char, std::string > amino_acid_text_colors()
Return a map of text colors for each amino acid code.
Definition: codes.cpp:667
genesis::sequence::nucleic_acid_codes_all
std::string nucleic_acid_codes_all()
Return all valid nucleic acid codes. Those are ACGTUWSMKRYBDHVNOX.-?.
Definition: codes.cpp:315
genesis::sequence::amino_acid_codes_plain
std::string amino_acid_codes_plain()
Return all plain amino acid codes. Those are ACDEFGHIKLMNOPQRSTUVWY.
Definition: codes.cpp:333
genesis::sequence::nucleic_acid_name
std::string nucleic_acid_name(char code)
Get the name of a nucleic acid given its IUPAC code.
Definition: codes.cpp:686
genesis::sequence::amino_acid_colors
std::map< char, utils::Color > amino_acid_colors()
Return a map of Colors for each amino acid code.
Definition: codes.cpp:677