A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
string.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2017 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
33 
34 #include <algorithm>
35 #include <cctype>
36 #include <iomanip>
37 #include <sstream>
38 #include <stdexcept>
39 
40 namespace genesis {
41 namespace utils {
42 
43 // =================================================================================================
44 // Compare
45 // =================================================================================================
46 
47 bool equals_ci( std::string const& lhs, std::string const& rhs)
48 {
49  const size_t sz = lhs.size();
50  if( rhs.size() != sz ) {
51  return false;
52  }
53  for( size_t i = 0; i < sz; ++i ) {
54  if( tolower( lhs[i] ) != tolower( rhs[i] ) ) {
55  return false;
56  }
57  }
58  return true;
59 }
60 
61 bool starts_with( std::string const & text, std::string const & start )
62 {
63  if (start.size() > text.size()) {
64  return false;
65  }
66  return std::equal( start.begin(), start.end(), text.begin() );
67 }
68 
69 bool ends_with( std::string const & text, std::string const & ending )
70 {
71  if (ending.size() > text.size()) {
72  return false;
73  }
74  return std::equal( ending.rbegin(), ending.rend(), text.rbegin() );
75 }
76 
77 // =================================================================================================
78 // Substrings
79 // =================================================================================================
80 
81 std::string head( std::string const& text, size_t lines )
82 {
83  // Not totally efficient, but works for now.
84  auto vec = split( text, "\n", false );
85  size_t remove = vec.size() > lines ? vec.size() - lines : 0;
86  vec.erase( vec.end() - remove, vec.end() );
87  return join( vec, "\n" );
88 }
89 
90 std::string tail( std::string const& text, size_t lines )
91 {
92  // Not totally efficient, but works for now.
93  auto vec = split( text, "\n", false );
94  size_t remove = vec.size() > lines ? vec.size() - lines : 0;
95  vec.erase( vec.begin(), vec.begin() + remove );
96  return join( vec, "\n" );
97 }
98 
99 // =================================================================================================
100 // Find and Count
101 // =================================================================================================
102 
103 size_t count_substring_occurrences( std::string const& str, std::string const& sub )
104 {
105  if (sub.length() == 0) {
106  return 0;
107  }
108 
109  size_t count = 0;
110  for(
111  size_t offset = str.find(sub);
112  offset != std::string::npos;
113  offset = str.find( sub, offset + 1 )
114  ) {
115  ++count;
116  }
117 
118  return count;
119 }
120 
121 std::vector<std::string> split (
122  std::string const& str,
123  std::string const& delimiters,
124  const bool trim_empty
125 ) {
126  size_t pos;
127  size_t last_pos = 0;
128 
129  std::vector<std::string> result;
130 
131  while (true) {
132  pos = str.find_first_of(delimiters, last_pos);
133 
134  if (pos == std::string::npos) {
135  pos = str.length();
136 
137  if (pos != last_pos || !trim_empty) {
138  result.push_back(std::string(str.data() + last_pos, pos - last_pos));
139  }
140 
141  break;
142  } else {
143  if(pos != last_pos || !trim_empty) {
144  result.push_back(std::string(str.data() + last_pos, pos - last_pos));
145  }
146  }
147 
148  last_pos = pos + 1;
149  }
150 
151  return result;
152 }
153 
154 // =================================================================================================
155 // Manipulate
156 // =================================================================================================
157 
158 std::string indent(
159  std::string const& text,
160  std::string const& indentation
161 ) {
162  auto ret = indentation + replace_all( text, "\n", "\n" + indentation );
163  return trim_right( ret, indentation );
164 }
165 
166 std::string replace_all (
167  std::string const& text, std::string const& search, std::string const& replace
168 ) {
169  std::string tmp = text;
170  for (size_t pos = 0; ; pos += replace.length()) {
171  pos = tmp.find(search, pos);
172 
173  if (pos == std::string::npos){
174  break;
175  }
176 
177  tmp.erase(pos, search.length());
178  tmp.insert(pos, replace);
179  }
180  return tmp;
181 }
182 
183 // inline version
184 /*
185 void replace_all(
186  std::string &s, const std::string &search, const std::string &replace
187 ) {
188  for (size_t pos = 0; ; pos += replace.length() ) {
189  pos = s.find(search, pos);
190 
191  if (pos == string::npos)
192  break;
193 
194  s.erase(pos, search.length());
195  s.insert(pos, replace);
196  }
197 }
198 */
199 
200 std::string replace_all_chars (
201  std::string const& text,
202  std::string const& search_chars,
203  char replace
204 ) {
205  auto result = text;
206  for( auto& c : result ) {
207  if( search_chars.find( c ) != std::string::npos ) {
208  c = replace;
209  }
210  }
211  return result;
212 }
213 
214 std::string trim_right (
215  std::string const& s,
216  std::string const& delimiters
217 ) {
218  auto const pos = s.find_last_not_of(delimiters);
219  if( std::string::npos == pos ) {
220  return "";
221  } else {
222  return s.substr( 0, pos + 1 );
223  }
224 }
225 
226 std::string trim_left (
227  std::string const& s,
228  std::string const& delimiters
229 ) {
230  auto const pos = s.find_first_not_of(delimiters);
231  if( std::string::npos == pos ) {
232  return "";
233  } else {
234  return s.substr(pos);
235  }
236 }
237 
238 std::string trim (
239  std::string const& s,
240  std::string const& delimiters
241 ) {
242  return trim_left(trim_right(s, delimiters), delimiters);
243 }
244 
245 // =================================================================================================
246 // Normalize
247 // =================================================================================================
248 
249 std::string to_lower( std::string const& str )
250 {
251  auto res = str;
252  for( auto& c : res ){
253  c = tolower(c);
254  }
255  return res;
256 }
257 
258 void to_lower_inplace( std::string& str )
259 {
260  for( auto& c : str ){
261  c = tolower(c);
262  }
263 }
264 
265 std::string to_upper( std::string const& str )
266 {
267  auto res = str;
268  for( auto& c : res ){
269  c = toupper(c);
270  }
271  return res;
272 }
273 
274 void to_upper_inplace( std::string& str )
275 {
276  for( auto& c : str ){
277  c = toupper(c);
278  }
279 }
280 
281 char to_lower_ascii( char c )
282 {
283  return ( 'A' <= c && c <= 'Z' ) ? c + 0x20 : c;
284 }
285 
286 char to_upper_ascii( char c )
287 {
288  return ( 'a' <= c && c <= 'z' ) ? c - 0x20 : c;
289 }
290 
291 std::string to_lower_ascii( std::string const& str )
292 {
293  auto res = str;
294  for( auto& c : res ){
295  c = to_lower_ascii(c);
296  }
297  return res;
298 }
299 
300 void to_lower_ascii_inplace( std::string& str )
301 {
302  for( auto& c : str ){
303  c = to_lower_ascii(c);
304  }
305 }
306 
307 std::string to_upper_ascii( std::string const& str )
308 {
309  auto res = str;
310  for( auto& c : res ){
311  c = to_upper_ascii(c);
312  }
313  return res;
314 }
315 
316 void to_upper_ascii_inplace( std::string& str )
317 {
318  for( auto& c : str ){
319  c = to_upper_ascii(c);
320  }
321 }
322 
323 std::string escape( std::string const& text )
324 {
325  // This is slow-ish, because the string is iterated multiple times. Could be done faster.
326  std::string tmp;
327  tmp = replace_all( text, "\r", "\\r" );
328  tmp = replace_all( tmp, "\n", "\\n" );
329  tmp = replace_all( tmp, "\t", "\\t" );
330  tmp = replace_all( tmp, "\"", "\\\"" );
331  tmp = replace_all( tmp, "\\", "\\\\" );
332  return tmp;
333 }
334 
335 std::string deescape( std::string const& text )
336 {
337  // Prepare a string that might be a bit too big, but saves reallocation.
338  std::string tmp;
339  tmp.reserve( text.size() );
340 
341  // Copy from text to tmp string, while deescaping.
342  for( size_t i = 0; i < text.size(); ++i ) {
343  if( text[ i ] == '\\' ) {
344  if( i + 1 >= text.size() ){
345  break;
346  }
347 
348  tmp += deescape( text[ i + 1 ] );
349  ++i;
350  } else {
351  tmp += text[ i ];
352  }
353  }
354  return tmp;
355 }
356 
357 char deescape( char c )
358 {
359  switch( c ) {
360  case 'r' :
361  return '\r';
362 
363  case 'n' :
364  return '\n';
365 
366  case 't' :
367  return '\t';
368 
369  default :
370  return c;
371  }
372 }
373 
374 // =================================================================================================
375 // Output
376 // =================================================================================================
377 
378 std::string repeat( std::string const& word, size_t times )
379 {
380  // Init and avoid repeated reallocation.
381  std::string result;
382  result.reserve( times * word.length() );
383 
384  // Concat repeats.
385  for( size_t i = 0; i < times; ++i ) {
386  result += word ;
387  }
388  return result;
389 }
390 
391 std::string to_string_leading_zeros( size_t value, size_t length )
392 {
393  std::stringstream ss;
394  ss << std::setw( length ) << std::setfill( '0' ) << value;
395  return ss.str();
396 }
397 
398 std::string to_string_precise( double const value, int const precision )
399 {
400  // Simple and straight forward.
401  std::ostringstream s;
402  s << std::fixed << std::setprecision( precision ) << value;
403  return s.str();
404 }
405 
406 std::string to_string_rounded( double const value, int const precision )
407 {
408  // Get fixed precision string.
409  std::ostringstream s;
410  s << std::fixed << std::setprecision( precision ) << value;
411  auto str = s.str();
412 
413  // Truncate trailing zeros, unless there are only zeros after the decimal point. Then, also
414  // delete the decimal point.
415  size_t offset = 1;
416  size_t const last_nonzero = str.find_last_not_of('0');
417  if( str[ last_nonzero ] == '.' ) {
418  offset = 0;
419  }
420  str.erase( last_nonzero + offset, std::string::npos );
421  return str;
422 }
423 
424 } // namespace utils
425 } // namespace genesis
void offset(Histogram &h, double value)
Definition: operations.cpp:47
size_t count_substring_occurrences(std::string const &str, std::string const &sub)
Return the number of (possibly overlapping) occurrences of a substring in a string.
Definition: string.cpp:103
std::string to_string_rounded(double const value, int const precision)
Return a string representation of the input value, using the provided precision value (determining it...
Definition: string.cpp:406
bool starts_with(std::string const &text, std::string const &start)
Return whether a string starts with another string.
Definition: string.cpp:61
std::string trim(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with trimmed white spaces.
Definition: string.cpp:238
void to_upper_ascii_inplace(std::string &str)
Turn the given string to all-uppercase, ascii-only.
Definition: string.cpp:316
std::vector< std::string > split(std::string const &str, std::string const &delimiters, const bool trim_empty)
Spilt a string into parts, given a set of delimiter chars.
Definition: string.cpp:121
std::string trim_right(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with left trimmed white spaces.
Definition: string.cpp:214
std::string deescape(std::string const &text)
Return a string where backslash-escaped characters are transformed into their respective string form...
Definition: string.cpp:335
std::string join(T const &v, std::string const &delimiter)
Return a string where the elements of a container v are joined using the string delimiter in between ...
Definition: string.hpp:318
std::string to_upper(std::string const &str)
Return an all-uppercase copy of the given string, locale-aware.
Definition: string.cpp:265
std::string head(std::string const &text, size_t lines)
Return the first lines of the text.
Definition: string.cpp:81
std::string repeat(std::string const &word, size_t times)
Take a string and repeat it a given number of times.
Definition: string.cpp:378
std::string replace_all(std::string const &text, std::string const &search, std::string const &replace)
Return a copy of a string, where all occurrences of a search string are replaced by a replace string...
Definition: string.cpp:166
char to_lower_ascii(char c)
Return the lower case of a given char, ascii-only.
Definition: string.cpp:281
std::string to_lower(std::string const &str)
Return an all-lowercase copy of the given string, locale-aware.
Definition: string.cpp:249
std::string replace_all_chars(std::string const &text, std::string const &search_chars, char replace)
Replace all occurrences of the search_chars in text by the replace char.
Definition: string.cpp:200
std::string escape(std::string const &text)
Return a string where special chars are replaces by their escape sequence.
Definition: string.cpp:323
std::string tail(std::string const &text, size_t lines)
Return the last lines of the text.
Definition: string.cpp:90
std::string indent(std::string const &text, std::string const &indentation)
Indent each line of text with indentation and return the result.
Definition: string.cpp:158
void to_lower_ascii_inplace(std::string &str)
Turn the given string to all-lowercase, ascii-only.
Definition: string.cpp:300
bool equals_ci(std::string const &lhs, std::string const &rhs)
Compare two strings case insensitive.
Definition: string.cpp:47
std::string to_string_precise(double const value, int const precision)
Return a precise string representation of the input value, using the provided precision value (determ...
Definition: string.cpp:398
std::string to_string_leading_zeros(size_t value, size_t length)
Return a string representation of a size_t value with a fixed length, that is, by adding leading zero...
Definition: string.cpp:391
std::string trim_left(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with right trimmed white spaces.
Definition: string.cpp:226
Provides some commonly used string utility functions.
bool ends_with(std::string const &text, std::string const &ending)
Return whether a string ends with another string.
Definition: string.cpp:69
void to_lower_inplace(std::string &str)
Turn the given string to all-lowercase, locale-aware.
Definition: string.cpp:258
char to_upper_ascii(char c)
Return the upper case of a given char, ascii-only.
Definition: string.cpp:286
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.
bool equal(Tree const &lhs, Tree const &rhs, std::function< bool(TreeNode const &, TreeNode const &) > node_comparator, std::function< bool(TreeEdge const &, TreeEdge const &) > edge_comparator)
Compares two trees for equality given binary comparator functionals for their nodes and edges...
void to_upper_inplace(std::string &str)
Turn the given string to all-uppercase, locale-aware.
Definition: string.cpp:274