A toolkit for working with phylogenetic data.
v0.24.0
taxopath_parser.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
36 
37 #include <algorithm>
38 #include <cassert>
39 #include <stdexcept>
40 
41 namespace genesis {
42 namespace taxonomy {
43 
44 // =================================================================================================
45 // Parsing
46 // =================================================================================================
47 
48 Taxopath TaxopathParser::parse( std::string const& taxopath ) const
49 {
50  std::vector< std::string > elements;
51 
52  // Split the given string, while keeping empty parts.
53  elements = utils::split( taxopath, delimiters_, false );
54 
55  // If there are no elements, the string was empty. Nothing to do.
56  if( elements.size() == 0 ) {
57  assert( taxopath == "" );
58  return Taxopath();
59  }
60 
61  // Remove white spaces.
62  if( trim_whitespaces_ ) {
63  for( auto& r : elements ) {
64  r = utils::trim( r );
65  }
66  }
67 
68  // The first name in the list of sub-taxa must not be empty.
69  if( elements.front() == "" ) {
70  throw std::runtime_error( "Cannot use Taxopath if first Taxon is empty." );
71  }
72 
73  // The last name is ommited if empty.
74  if( remove_trailing_delimiter_ && elements.back() == "" ) {
75  elements.pop_back();
76  }
77 
78  // Resolve empty elements.
79  std::string prev_name;
80  for( size_t i = 0; i < elements.size(); ++i ) {
81  auto& name = elements[i];
82 
83  // If a sub-taxon is empty, use the super-taxon.
84  // As we previously checked that the first taxon is not empty, this is well-formed.
85  if( name == "" ) {
86  assert( i > 0 && prev_name != "" );
87  name = prev_name;
88  }
89 
90  prev_name = name;
91  }
92 
93  return Taxopath( elements );
94 }
95 
96 Taxopath TaxopathParser::parse( Taxon const& taxon ) const
97 {
98  // Start with an empty vector that will store the super-taxa of the given taxon.
99  std::vector<std::string> elements;
100 
101  // Add taxa in reverse order: the deepest taxon will be stored first.
102  // This is fast with a vector.
103  Taxon const* r = &taxon;
104  while( r != nullptr ) {
105  elements.push_back( r->name() );
106  r = r->parent();
107  }
108 
109  // Now reverse and return the result.
110  std::reverse( elements.begin(), elements.end() );
111  return Taxopath( elements );
112 }
113 
114 } // namespace taxonomy
115 } // namespace genesis
Taxon const * parent() const
Return a pointer to the parent of this taxon, or a nullptr if this is the top level taxon...
Definition: taxon.cpp:173
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
std::string const & name() const
Return the name of this taxon.
Definition: taxon.cpp:131
std::string trim(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with trimmed white spaces.
Definition: string.cpp:394
Store a Taxon, i.e., an element in a Taxonomy, with its name, rank, ID and sub-taxa.
Definition: taxon.hpp:76
std::vector< std::string > split(std::string const &str, std::string const &delimiters, const bool trim_empty)
Spilt a string into parts, given a delimiters set of chars.
Definition: string.cpp:178
Provides some commonly used string utility functions.
Helper class to store a taxonomic path.
Definition: taxopath.hpp:83
Taxopath parse(std::string const &taxopath) const
Parse a taxonomic path string into a Taxopath object and return it.