A library for working with phylogenetic and population genetic data.
v0.32.0
ranks.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
34 
35 #include <unordered_map>
36 
37 namespace genesis {
38 namespace taxonomy {
39 
40 // =================================================================================================
41 // Ranks
42 // =================================================================================================
43 
47 static const std::unordered_map<char, std::string> rank_abbreviations = {
48  { 'd', "Domain" },
49  { 'k', "Kingdom" },
50  { 'p', "Phylum" },
51  { 'c', "Class" },
52  { 'o', "Order" },
53  { 'f', "Family" },
54  { 'g', "Genus" },
55  { 's', "Species" }
56 };
57 
75 std::string rank_from_abbreviation( char r )
76 {
77  char const c = static_cast<char>( std::tolower( static_cast<unsigned char>( r )));
78  if( rank_abbreviations.count( c ) > 0 ) {
79  return rank_abbreviations.at( c );
80  } else {
81  return "";
82  }
83 }
84 
92 std::string rank_to_abbreviation( std::string const& rank )
93 {
94  auto r = utils::to_lower( rank );
95  for( auto const& p : rank_abbreviations ) {
96  if( utils::to_lower( p.second ) == r ) {
97  return std::string( 1, p.first );
98  }
99  }
100  return "";
101 }
102 
121 std::pair< std::string, std::string > resolve_rank_abbreviation( std::string const& entry )
122 {
123  std::string rank = "";
124  std::string name = entry;
125 
126  // Check whether the name is of the form "X_something".
127  // If so, use it to split off the rank name and resolve the abbreviation.
128  if( entry.size() >= 2 && entry[1] == '_' ) {
129  rank = rank_from_abbreviation( entry[0] );
130  }
131 
132  // If the previous step was successful and yielded a valid rank name,
133  // shorten the actual name accordingly.
134  if( rank != "" ) {
135  size_t pos = entry.find_first_not_of( "_", 1 );
136  name = entry.substr( pos );
137  }
138 
139  return { rank, name };
140 }
141 
142 } // namespace taxonomy
143 } // namespace genesis
genesis::taxonomy::rank_abbreviations
static const std::unordered_map< char, std::string > rank_abbreviations
Local helper data that stores the abbreviations and names of common taxonomic ranks.
Definition: ranks.cpp:47
genesis::taxonomy::resolve_rank_abbreviation
std::pair< std::string, std::string > resolve_rank_abbreviation(std::string const &entry)
Resolve a combined rank and name entry of the form "k_Bacteria" into the full rank and the name,...
Definition: ranks.cpp:121
ranks.hpp
string.hpp
Provides some commonly used string utility functions.
genesis::taxonomy::rank_to_abbreviation
std::string rank_to_abbreviation(std::string const &rank)
Get the abbreviation of a taxonomic rank name.
Definition: ranks.cpp:92
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::taxonomy::rank_from_abbreviation
std::string rank_from_abbreviation(char r)
Get the taxonomic rank name given its abbreviation.
Definition: ranks.cpp:75
genesis::utils::to_lower
constexpr char to_lower(char c) noexcept
Return the lower case version of a letter, ASCII-only.
Definition: char.hpp:221