A toolkit for working with phylogenetic data.
v0.19.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
string.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
34 
35 #include <algorithm>
36 #include <cctype>
37 #include <iomanip>
38 #include <sstream>
39 #include <stdexcept>
40 
41 namespace genesis {
42 namespace utils {
43 
44 // =================================================================================================
45 // Compare
46 // =================================================================================================
47 
48 bool contains_ci( std::vector<std::string> const& haystack, std::string const& needle )
49 {
50  auto const l_needle = to_lower( needle );
51  for( auto const& val : haystack ) {
52  if( to_lower( val ) == l_needle ) {
53  return true;
54  }
55  }
56  return false;
57 }
58 
59 bool equals_ci( std::string const& lhs, std::string const& rhs)
60 {
61  const size_t sz = lhs.size();
62  if( rhs.size() != sz ) {
63  return false;
64  }
65  for( size_t i = 0; i < sz; ++i ) {
66  if( tolower( lhs[i] ) != tolower( rhs[i] ) ) {
67  return false;
68  }
69  }
70  return true;
71 }
72 
73 bool starts_with( std::string const & text, std::string const & start )
74 {
75  if (start.size() > text.size()) {
76  return false;
77  }
78  return std::equal( start.begin(), start.end(), text.begin() );
79 }
80 
81 bool ends_with( std::string const & text, std::string const & ending )
82 {
83  if (ending.size() > text.size()) {
84  return false;
85  }
86  return std::equal( ending.rbegin(), ending.rend(), text.rbegin() );
87 }
88 
89 // =================================================================================================
90 // Substrings
91 // =================================================================================================
92 
93 std::string head( std::string const& text, size_t lines )
94 {
95  // Not totally efficient, but works for now.
96  auto vec = split( text, "\n", false );
97  size_t remove = vec.size() > lines ? vec.size() - lines : 0;
98  vec.erase( vec.end() - remove, vec.end() );
99  return join( vec, "\n" );
100 }
101 
102 std::string tail( std::string const& text, size_t lines )
103 {
104  // Not totally efficient, but works for now.
105  auto vec = split( text, "\n", false );
106  size_t remove = vec.size() > lines ? vec.size() - lines : 0;
107  vec.erase( vec.begin(), vec.begin() + remove );
108  return join( vec, "\n" );
109 }
110 
111 // =================================================================================================
112 // Find and Count
113 // =================================================================================================
114 
115 size_t count_substring_occurrences( std::string const& str, std::string const& sub )
116 {
117  if (sub.length() == 0) {
118  return 0;
119  }
120 
121  size_t count = 0;
122  for(
123  size_t offset = str.find(sub);
124  offset != std::string::npos;
125  offset = str.find( sub, offset + 1 )
126  ) {
127  ++count;
128  }
129 
130  return count;
131 }
132 
136 std::vector<std::string> split (
137  std::string const& string,
138  std::function<size_t ( std::string const&, size_t )> find_pos,
139  size_t advance_by,
140  const bool trim_empty
141 ) {
142  size_t pos;
143  size_t last_pos = 0;
144 
145  std::vector<std::string> result;
146 
147  while( true ) {
148  // Find first matching char.
149  pos = find_pos( string, last_pos );
150 
151  // If not found, push back rest and stop.
152  if( pos == std::string::npos ) {
153  pos = string.length();
154 
155  if( pos != last_pos || !trim_empty ) {
156  result.push_back( std::string( string.data() + last_pos, pos - last_pos ));
157  }
158 
159  break;
160 
161  // If found, push back and continue.
162  } else {
163  if( pos != last_pos || !trim_empty ) {
164  result.push_back( std::string( string.data() + last_pos, pos - last_pos ));
165  }
166  }
167 
168  last_pos = pos + advance_by;
169  }
170 
171  return result;
172 }
173 
174 std::vector<std::string> split (
175  std::string const& string,
176  std::string const& delimiters,
177  const bool trim_empty
178 ) {
179  return split(
180  string,
181  [&]( std::string const& str, size_t last_pos ){
182  return str.find_first_of( delimiters, last_pos );
183  },
184  1,
185  trim_empty
186  );
187 }
188 
189 std::vector<std::string> split (
190  std::string const& string,
191  std::function<bool(char)> delimiter_predicate,
192  const bool trim_empty
193 ) {
194  return split(
195  string,
196  [&]( std::string const& str, size_t last_pos ){
197  // Find first matching char.
198  size_t pos = std::string::npos;
199  for( size_t i = last_pos; i < str.size(); ++i ) {
200  if( delimiter_predicate( str[i] ) ) {
201  pos = i;
202  break;
203  }
204  }
205  return pos;
206  },
207  1,
208  trim_empty
209  );
210 }
211 
212 std::vector<std::string> split_at (
213  std::string const& string,
214  std::string const& delimiter,
215  const bool trim_empty
216 ) {
217  return split(
218  string,
219  [&]( std::string const& str, size_t last_pos ){
220  return str.find( delimiter, last_pos );
221  },
222  delimiter.size(),
223  trim_empty
224  );
225 }
226 
227 // =================================================================================================
228 // Manipulate
229 // =================================================================================================
230 
231 std::string indent(
232  std::string const& text,
233  std::string const& indentation
234 ) {
235  auto ret = indentation + replace_all( text, "\n", "\n" + indentation );
236  return trim_right( ret, indentation );
237 }
238 
239 std::string replace_all (
240  std::string const& text, std::string const& search, std::string const& replace
241 ) {
242  std::string tmp = text;
243  for (size_t pos = 0; ; pos += replace.length()) {
244  pos = tmp.find(search, pos);
245 
246  if (pos == std::string::npos){
247  break;
248  }
249 
250  tmp.erase(pos, search.length());
251  tmp.insert(pos, replace);
252  }
253  return tmp;
254 }
255 
256 // inline version
257 /*
258 void replace_all(
259  std::string &s, const std::string &search, const std::string &replace
260 ) {
261  for (size_t pos = 0; ; pos += replace.length() ) {
262  pos = s.find(search, pos);
263 
264  if (pos == string::npos)
265  break;
266 
267  s.erase(pos, search.length());
268  s.insert(pos, replace);
269  }
270 }
271 */
272 
273 std::string replace_all_chars (
274  std::string const& text,
275  std::string const& search_chars,
276  char replace
277 ) {
278  auto result = text;
279  for( auto& c : result ) {
280  if( search_chars.find( c ) != std::string::npos ) {
281  c = replace;
282  }
283  }
284  return result;
285 }
286 
287 std::string trim_right (
288  std::string const& s,
289  std::string const& delimiters
290 ) {
291  auto const pos = s.find_last_not_of(delimiters);
292  if( std::string::npos == pos ) {
293  return "";
294  } else {
295  return s.substr( 0, pos + 1 );
296  }
297 }
298 
299 std::string trim_left (
300  std::string const& s,
301  std::string const& delimiters
302 ) {
303  auto const pos = s.find_first_not_of(delimiters);
304  if( std::string::npos == pos ) {
305  return "";
306  } else {
307  return s.substr(pos);
308  }
309 }
310 
311 std::string trim (
312  std::string const& s,
313  std::string const& delimiters
314 ) {
315  return trim_left(trim_right(s, delimiters), delimiters);
316 }
317 
318 // =================================================================================================
319 // Normalize
320 // =================================================================================================
321 
322 std::string escape( std::string const& text )
323 {
324  // This is slow-ish, because the string is iterated multiple times. Could be done faster.
325  std::string tmp;
326  tmp = replace_all( text, "\r", "\\r" );
327  tmp = replace_all( tmp, "\n", "\\n" );
328  tmp = replace_all( tmp, "\t", "\\t" );
329  tmp = replace_all( tmp, "\"", "\\\"" );
330  tmp = replace_all( tmp, "\\", "\\\\" );
331  return tmp;
332 }
333 
334 std::string deescape( std::string const& text )
335 {
336  // Prepare a string that might be a bit too big, but saves reallocation.
337  std::string tmp;
338  tmp.reserve( text.size() );
339 
340  // Copy from text to tmp string, while deescaping.
341  for( size_t i = 0; i < text.size(); ++i ) {
342  if( text[ i ] == '\\' ) {
343  if( i + 1 >= text.size() ){
344  break;
345  }
346 
347  tmp += deescape( text[ i + 1 ] );
348  ++i;
349  } else {
350  tmp += text[ i ];
351  }
352  }
353  return tmp;
354 }
355 
356 char deescape( char c )
357 {
358  switch( c ) {
359  case 'r' :
360  return '\r';
361 
362  case 'n' :
363  return '\n';
364 
365  case 't' :
366  return '\t';
367 
368  default :
369  return c;
370  }
371 }
372 
373 // =================================================================================================
374 // Output
375 // =================================================================================================
376 
377 std::string repeat( std::string const& word, size_t times )
378 {
379  // Init and avoid repeated reallocation.
380  std::string result;
381  result.reserve( times * word.length() );
382 
383  // Concat repeats.
384  for( size_t i = 0; i < times; ++i ) {
385  result += word ;
386  }
387  return result;
388 }
389 
390 std::string to_string_leading_zeros( size_t value, size_t length )
391 {
392  std::stringstream ss;
393  ss << std::setw( length ) << std::setfill( '0' ) << value;
394  return ss.str();
395 }
396 
397 std::string to_string_precise( double const value, int const precision )
398 {
399  // Simple and straight forward.
400  std::ostringstream s;
401  s << std::fixed << std::setprecision( precision ) << value;
402  return s.str();
403 }
404 
405 std::string to_string_rounded( double const value, int const precision )
406 {
407  // Get fixed precision string.
408  std::ostringstream s;
409  s << std::fixed << std::setprecision( precision ) << value;
410  auto str = s.str();
411 
412  // Truncate trailing zeros, unless there are only zeros after the decimal point. Then, also
413  // delete the decimal point.
414  size_t offset = 1;
415  size_t const last_nonzero = str.find_last_not_of('0');
416  if( str[ last_nonzero ] == '.' ) {
417  offset = 0;
418  }
419  str.erase( last_nonzero + offset, std::string::npos );
420  return str;
421 }
422 
423 } // namespace utils
424 } // namespace genesis
void offset(Histogram &h, double value)
Definition: operations.cpp:47
size_t count_substring_occurrences(std::string const &str, std::string const &sub)
Return the number of (possibly overlapping) occurrences of a substring in a string.
Definition: string.cpp:115
std::string to_string_rounded(double const value, int const precision)
Return a string representation of the input value, using the provided precision value (determining it...
Definition: string.cpp:405
std::vector< std::string > split_at(std::string const &string, std::string const &delimiter, const bool trim_empty)
Spilt a string into parts, given a delimiter string.
Definition: string.cpp:212
bool starts_with(std::string const &text, std::string const &start)
Return whether a string starts with another string.
Definition: string.cpp:73
std::string trim(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with trimmed white spaces.
Definition: string.cpp:311
std::string trim_right(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with left trimmed white spaces.
Definition: string.cpp:287
std::string deescape(std::string const &text)
Return a string where backslash-escaped characters are transformed into their respective string form...
Definition: string.cpp:334
std::string join(T const &v, std::string const &delimiter)
Return a string where the elements of a container v are joined using the string delimiter in between ...
Definition: string.hpp:391
std::string head(std::string const &text, size_t lines)
Return the first lines of the text.
Definition: string.cpp:93
std::string repeat(std::string const &word, size_t times)
Take a string and repeat it a given number of times.
Definition: string.cpp:377
std::string replace_all(std::string const &text, std::string const &search, std::string const &replace)
Return a copy of a string, where all occurrences of a search string are replaced by a replace string...
Definition: string.cpp:239
std::vector< std::string > split(std::string const &string, std::function< size_t(std::string const &, size_t)> find_pos, size_t advance_by, const bool trim_empty)
Local function that does the work for the split cuntions.
Definition: string.cpp:136
std::string to_lower(std::string const &str)
Return an all-lowercase copy of the given string, locale-aware.
Definition: string.hpp:198
std::string replace_all_chars(std::string const &text, std::string const &search_chars, char replace)
Replace all occurrences of the search_chars in text by the replace char.
Definition: string.cpp:273
std::string escape(std::string const &text)
Return a string where special chars are replaces by their escape sequence.
Definition: string.cpp:322
std::string tail(std::string const &text, size_t lines)
Return the last lines of the text.
Definition: string.cpp:102
std::string indent(std::string const &text, std::string const &indentation)
Indent each line of text with indentation and return the result.
Definition: string.cpp:231
bool equals_ci(std::string const &lhs, std::string const &rhs)
Compare two strings case insensitive.
Definition: string.cpp:59
std::string to_string_precise(double const value, int const precision)
Return a precise string representation of the input value, using the provided precision value (determ...
Definition: string.cpp:397
std::string to_string_leading_zeros(size_t value, size_t length)
Return a string representation of a size_t value with a fixed length, that is, by adding leading zero...
Definition: string.cpp:390
std::string trim_left(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with right trimmed white spaces.
Definition: string.cpp:299
Provides some commonly used string utility functions.
bool ends_with(std::string const &text, std::string const &ending)
Return whether a string ends with another string.
Definition: string.cpp:81
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.
bool equal(Tree const &lhs, Tree const &rhs, std::function< bool(TreeNode const &, TreeNode const &) > node_comparator, std::function< bool(TreeEdge const &, TreeEdge const &) > edge_comparator)
Compares two trees for equality given binary comparator functionals for their nodes and edges...
bool contains_ci(std::vector< std::string > const &haystack, std::string const &needle)
Return whether a vector of strings contains a given string, case insensitive.
Definition: string.cpp:48