A toolkit for working with phylogenetic data.
v0.19.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
taxopath_parser.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2017 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
36 
37 #include <algorithm>
38 #include <assert.h>
39 #include <stdexcept>
40 
41 namespace genesis {
42 namespace taxonomy {
43 
44 // =================================================================================================
45 // Parsing
46 // =================================================================================================
47 
53 Taxopath TaxopathParser::from_string( std::string const& taxopath ) const
54 {
55  std::vector< std::string > elements;
56 
57  // Split the given string, while keeping empty parts.
58  elements = utils::split( taxopath, delimiters_, false );
59 
60  // If there are no elements, the string was empty. Nothing to do.
61  if( elements.size() == 0 ) {
62  assert( taxopath == "" );
63  return Taxopath();
64  }
65 
66  // Remove white spaces.
67  if( trim_whitespaces_ ) {
68  for( auto& r : elements ) {
69  r = utils::trim( r );
70  }
71  }
72 
73  // The first name in the list of sub-taxa must not be empty.
74  if( elements.front() == "" ) {
75  throw std::runtime_error( "Cannot use Taxopath if first Taxon is empty." );
76  }
77 
78  // The last name is ommited if empty.
79  if( remove_trailing_delimiter_ && elements.back() == "" ) {
80  elements.pop_back();
81  }
82 
83  // Resolve empty elements.
84  std::string prev_name;
85  for( size_t i = 0; i < elements.size(); ++i ) {
86  auto& name = elements[i];
87 
88  // If a sub-taxon is empty, use the super-taxon.
89  // As we previously checked that the first taxon is not empty, this is well-formed.
90  if( name == "" ) {
91  assert( i > 0 && prev_name != "" );
92  name = prev_name;
93  }
94 
95  prev_name = name;
96  }
97 
98  return Taxopath( elements );
99 }
100 
106 Taxopath TaxopathParser::operator() ( std::string const& taxopath ) const
107 {
108  return from_string( taxopath );
109 }
110 
121 {
122  // Start with an empty vector that will store the super-taxa of the given taxon.
123  std::vector<std::string> elements;
124 
125  // Add taxa in reverse order: the deepest taxon will be stored first.
126  // This is fast with a vector.
127  Taxon const* r = &taxon;
128  while( r != nullptr ) {
129  elements.push_back( r->name() );
130  r = r->parent();
131  }
132 
133  // Now reverse and return the result.
134  std::reverse( elements.begin(), elements.end() );
135  return Taxopath( elements );
136 }
137 
144 {
145  return from_taxon( taxon );
146 }
147 
148 // =================================================================================================
149 // Properties
150 // =================================================================================================
151 
166 TaxopathParser& TaxopathParser::delimiters( std::string const& value )
167 {
168  delimiters_ = value;
169  return *this;
170 }
171 
177 std::string TaxopathParser::delimiters() const
178 {
179  return delimiters_;
180 }
181 
199 {
200  trim_whitespaces_ = value;
201  return *this;
202 }
203 
210 {
211  return trim_whitespaces_;
212 }
213 
226 {
227  remove_trailing_delimiter_ = value;
228  return *this;
229 }
230 
238 {
239  return remove_trailing_delimiter_;
240 }
241 
242 } // namespace taxonomy
243 } // namespace genesis
bool trim_whitespaces() const
Return the currently set value whether whitespaces are trimmed off the taxonomic elements.
Taxopath operator()(std::string const &taxopath) const
Shortcut function alias for from_string().
std::string trim(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with trimmed white spaces.
Definition: string.cpp:311
std::string const & name() const
Return the name of this taxon.
Definition: taxon.cpp:169
bool remove_trailing_delimiter() const
Return whether currently trailing delimiters are removed from the taxonomic path string.
std::vector< std::string > split(std::string const &string, std::function< size_t(std::string const &, size_t)> find_pos, size_t advance_by, const bool trim_empty)
Local function that does the work for the split cuntions.
Definition: string.cpp:136
std::string delimiters() const
Return the currelty set delimiter chars used to split the taxonomic path string.
Store a Taxon, i.e., an element in a Taxonomy, with its name, rank and sub-taxa.
Definition: taxon.hpp:76
Provides some commonly used string utility functions.
Helper class to store a taxonomic path.
Definition: taxopath.hpp:81
Helper class to parse a string containing a taxonomic path string into a Taxopath object...
Taxopath from_string(std::string const &taxopath) const
Parse a taxonomic path string into a Taxopath object and return it.
Taxopath from_taxon(Taxon const &taxon) const
Helper function to turn a Taxon into a Taxopath.
Taxon const * parent() const
Return a pointer to the parent of this taxon, or a nullptr if this is the top level taxon...
Definition: taxon.cpp:209