A library for working with phylogenetic and population genetic data.
v0.27.0
string.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2020 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
34 
35 #include <algorithm>
36 #include <cassert>
37 #include <cctype>
38 #include <cstdio>
39 #include <iomanip>
40 #include <iostream>
41 #include <limits>
42 #include <sstream>
43 #include <stdexcept>
44 
45 #ifdef GENESIS_AVX
46  #include <immintrin.h>
47 #endif
48 
49 namespace genesis {
50 namespace utils {
51 
52 // =================================================================================================
53 // Compare
54 // =================================================================================================
55 
56 bool contains_ci( std::vector<std::string> const& haystack, std::string const& needle )
57 {
58  auto const l_needle = to_lower( needle );
59  for( auto const& val : haystack ) {
60  if( to_lower( val ) == l_needle ) {
61  return true;
62  }
63  }
64  return false;
65 }
66 
67 bool equals_ci( std::string const& lhs, std::string const& rhs)
68 {
69  const size_t sz = lhs.size();
70  if( rhs.size() != sz ) {
71  return false;
72  }
73  for( size_t i = 0; i < sz; ++i ) {
74  if( tolower( lhs[i] ) != tolower( rhs[i] ) ) {
75  return false;
76  }
77  }
78  return true;
79 }
80 
81 bool starts_with( std::string const & text, std::string const & start )
82 {
83  if (start.size() > text.size()) {
84  return false;
85  }
86  return std::equal( start.begin(), start.end(), text.begin() );
87 }
88 
89 bool ends_with( std::string const & text, std::string const & ending )
90 {
91  if (ending.size() > text.size()) {
92  return false;
93  }
94  return std::equal( ending.rbegin(), ending.rend(), text.rbegin() );
95 }
96 
97 bool match_wildcards( std::string const& str, std::string const& pattern )
98 {
99  // Code adapted from https://www.geeksforgeeks.org/wildcard-pattern-matching/
100 
101  // The empty pattern can only match with the empty string
102  if( pattern.empty() ) {
103  return str.empty();
104  }
105 
106  // Lookup table for dynamic programming approach of subproblem solutions, and init to zero.
107  // We use a vec of bool, and a lambda for access as if it was a matrix.
108  auto lookup_ = std::vector<bool>(( str.size() + 1 ) * ( pattern.size() + 1 ), false);
109  auto lookup = [&]( size_t i, size_t j ) -> std::vector<bool>::reference {
110  return lookup_[ i * ( pattern.size() + 1 ) + j ];
111  };
112 
113  // The empty pattern can match with empty string
114  lookup( 0, 0 ) = true;
115 
116  // Only '*' can match with empty string
117  for( size_t j = 1; j <= pattern.size(); j++ ) {
118  if( pattern[j - 1] == '*' ) {
119  lookup( 0, j ) = lookup( 0, j - 1 );
120  }
121  }
122 
123  // Fill the table in bottom-up fashion
124  for( size_t i = 1; i <= str.size(); i++ ) {
125  for( size_t j = 1; j <= pattern.size(); j++ ) {
126  if( pattern[j - 1] == '*' ) {
127 
128  // Two cases if we see a '*':
129  // a) We ignore ‘*’ character and move to next character in the pattern,
130  // i.e., ‘*’ indicates an empty sequence.
131  // b) '*' character matches with ith character in input
132  lookup( i, j ) = lookup( i, j - 1 ) || lookup( i - 1, j );
133 
134  } else if( pattern[j - 1] == '?' || str[i - 1] == pattern[j - 1] )
135 
136  // Current characters are considered as matching in two cases:
137  // (a) current character of pattern is '?'
138  // (b) characters actually match
139  lookup( i, j ) = lookup( i - 1, j - 1 );
140 
141  else {
142 
143  // If characters don't match
144  lookup( i, j ) = false;
145  }
146  }
147  }
148 
149  return lookup( str.size(), pattern.size() );
150 }
151 
152 int compare_natural( std::string const& lhs, std::string const& rhs )
153 {
154  // Implementation inspired by http://www.davekoelle.com/files/alphanum.hpp
155  // Released under the MIT License - https://opensource.org/licenses/MIT
156  // We however heavily modified it, in particular to work with arbitrary runs of digits.
157 
158  // Edge cases of empty strings.
159  if( lhs.empty() || rhs.empty() ) {
160  // Smart! Let's avoid to do all three cases, and instead convert to int (0 or 1):
161  // * lhs empty, but rhs not: 0 - 1 = -1
162  // * rhs empty, but lhs not: 1 - 0 = +1
163  // * both empty: 1 - 1 = 0
164  return static_cast<int>( rhs.empty() ) - static_cast<int>( lhs.empty() );
165  }
166 
167  // We need to switch between modes. Clear semantics instead of just a bool.
168  enum class ParseMode
169  {
170  kString,
171  kNumber
172  };
173  auto mode = ParseMode::kString;
174 
175  // Helper function to parse a string into an unsigned number, quickly.
176  // Advances the given pos while parsing, until either the end of the string or no more digits.
177  // --> not used, as it can only handle numbers up to the size of unsigned long...
178  // auto parse_unsigned_number_ = []( std::string const& str, size_t& pos ){
179  // unsigned long num = 0;
180  // while( pos < str.size() && is_digit( str[pos] )) {
181  // auto dig = str[pos] - '0';
182  // if( num > ( std::numeric_limits<T>::max() - dig ) / 10 ) {
183  // // This is ugly, and a proper solution would be to take string lengths into
184  // // account, but that would probably require to fully load them, and then compare...
185  // throw std::overflow_error( "Numerical overflow in compare_natural()" );
186  // }
187  // num = 10 * num + dig;
188  // ++pos;
189  // }
190  // return num;
191  // };
192 
193  // Iterate positions in the strings.
194  size_t l = 0;
195  size_t r = 0;
196  while( l < lhs.size() && r < rhs.size() ) {
197  if( mode == ParseMode::kString ) {
198 
199  // Iterate as long as there are strings/chars in both.
200  while( l < lhs.size() && r < rhs.size() ) {
201 
202  // Check if these are digits.
203  bool const l_digit = is_digit( lhs[l] );
204  bool const r_digit = is_digit( rhs[r] );
205 
206  // If both are digits, we continue in number mode.
207  if( l_digit && r_digit ) {
208  mode = ParseMode::kNumber;
209  break;
210  }
211 
212  // If only one of them is a digit, we have a result.
213  if( l_digit ) {
214  return -1;
215  }
216  if( r_digit ) {
217  return +1;
218  }
219 
220  // Neither is a digit, so compare as ASCII chars; if they differ, we have a result.
221  assert( ! l_digit && ! r_digit );
222  int const diff = static_cast<int>( lhs[l] ) - static_cast<int>( rhs[r] );
223  if( diff != 0 ) {
224  return diff;
225  }
226 
227  // Otherwise, process the next character.
228  ++l;
229  ++r;
230  }
231 
232  } else {
233  assert( mode == ParseMode::kNumber );
234 
235  // Here, a first idea was to parse both strings as numbers for as long as they contain
236  // digits, and then compare the resulting numbers. However, this overflows for larger
237  // numbers, and we can easily avoid that by an equally simple solution. We might need
238  // to iterate the digits twice, but save the effort of actually building the numbers!
239  // (see above parse_unsigned_number_() for the parsing function that we first had)
240 
241  // Parse the strings as long as they contain digits, advancing helper indices here.
242  size_t ld = l;
243  size_t rd = r;
244  while( ld < lhs.size() && is_digit( lhs[ld] )) {
245  ++ld;
246  }
247  while( rd < rhs.size() && is_digit( rhs[rd] )) {
248  ++rd;
249  }
250 
251  // If the lengths of digit runs differ, one of them is a larger number than the other.
252  // In that case, we have a result.
253  if( ld != rd ) {
254  return static_cast<int>( ld ) - static_cast<int>( rd );
255  }
256 
257  // If those numbers are the same length, we need to iterate again,
258  // and check digit by digit. Iterate as long as there are digits in both.
259  while( l < lhs.size() && r < rhs.size() ) {
260 
261  // Check if these are digits.
262  bool const l_digit = is_digit( lhs[l] );
263  bool const r_digit = is_digit( rhs[r] );
264 
265  // If there are no more digits, we continue in string mode.
266  if( ! l_digit || ! r_digit ) {
267  // In that case, both have to be not digits, as we just checked same length
268  // of the digit run, and both have to be the same as our previous iteration.
269  assert( ! l_digit && ! r_digit );
270  assert( ld == rd && l == ld && r == rd );
271  mode = ParseMode::kString;
272  break;
273  }
274 
275  // Compare the digits as ASCII chars; if they differ, we have a result.
276  assert( l_digit && r_digit );
277  int const diff = static_cast<int>( lhs[l] ) - static_cast<int>( rhs[r] );
278  if( diff != 0 ) {
279  return diff;
280  }
281 
282  // Otherwise, process the next character.
283  ++l;
284  ++r;
285  }
286  }
287  }
288 
289  // Lastly, if we are here, both strings are identical up to the point to which the were compared.
290  // So now, remaining lenghts checks. Only if everything is identical, return 0.
291  if( l < lhs.size() ) {
292  assert( r == rhs.size() );
293  return +1;
294  }
295  if( r < rhs.size() ) {
296  assert( l == lhs.size() );
297  return -1;
298  }
299  assert( l == lhs.size() && r == rhs.size() );
300  return 0;
301 }
302 
303 // =================================================================================================
304 // Substrings
305 // =================================================================================================
306 
307 std::string head( std::string const& text, size_t lines )
308 {
309  // Not totally efficient, but works for now.
310  auto vec = split( text, "\n", false );
311  size_t remove = vec.size() > lines ? vec.size() - lines : 0;
312  vec.erase( vec.end() - remove, vec.end() );
313  return join( vec, "\n" );
314 }
315 
316 std::string tail( std::string const& text, size_t lines )
317 {
318  // Not totally efficient, but works for now.
319  auto vec = split( text, "\n", false );
320  size_t remove = vec.size() > lines ? vec.size() - lines : 0;
321  vec.erase( vec.begin(), vec.begin() + remove );
322  return join( vec, "\n" );
323 }
324 
325 // =================================================================================================
326 // Find and Count
327 // =================================================================================================
328 
329 size_t count_substring_occurrences( std::string const& str, std::string const& sub )
330 {
331  if (sub.length() == 0) {
332  return 0;
333  }
334 
335  size_t count = 0;
336  for(
337  size_t offset = str.find(sub);
338  offset != std::string::npos;
339  offset = str.find( sub, offset + 1 )
340  ) {
341  ++count;
342  }
343 
344  return count;
345 }
346 
350 static std::vector<std::string> split_ (
351  std::string const& string,
352  std::function<size_t ( std::string const&, size_t )> find_pos,
353  size_t advance_by,
354  const bool trim_empty
355 ) {
356  std::vector<std::string> result;
357 
358  size_t last_pos = 0;
359  while( true ) {
360  // Find first matching char.
361  size_t pos = find_pos( string, last_pos );
362 
363  // If not found, push back rest and stop.
364  if( pos == std::string::npos ) {
365  pos = string.length();
366 
367  if( pos != last_pos || !trim_empty ) {
368  result.push_back( std::string( string.data() + last_pos, pos - last_pos ));
369  }
370 
371  break;
372 
373  // If found, push back and continue.
374  } else {
375  if( pos != last_pos || !trim_empty ) {
376  result.push_back( std::string( string.data() + last_pos, pos - last_pos ));
377  }
378  }
379 
380  last_pos = pos + advance_by;
381  }
382 
383  return result;
384 }
385 
386 std::vector<std::string> split (
387  std::string const& str,
388  std::string const& delimiters,
389  const bool trim_empty
390 ) {
391  return split_(
392  str,
393  [&]( std::string const& str, size_t last_pos ){
394  return str.find_first_of( delimiters, last_pos );
395  },
396  1,
397  trim_empty
398  );
399 }
400 
401 std::vector<std::string> split (
402  std::string const& str,
403  std::function<bool(char)> delimiter_predicate,
404  const bool trim_empty
405 ) {
406  return split_(
407  str,
408  [&]( std::string const& str, size_t last_pos ){
409  // Find first matching char.
410  size_t pos = std::string::npos;
411  for( size_t i = last_pos; i < str.size(); ++i ) {
412  if( delimiter_predicate( str[i] ) ) {
413  pos = i;
414  break;
415  }
416  }
417  return pos;
418  },
419  1,
420  trim_empty
421  );
422 }
423 
424 std::vector<std::string> split_at (
425  std::string const& str,
426  std::string const& delimiter,
427  const bool trim_empty
428 ) {
429  return split_(
430  str,
431  [&]( std::string const& str, size_t last_pos ){
432  return str.find( delimiter, last_pos );
433  },
434  delimiter.size(),
435  trim_empty
436  );
437 }
438 
439 std::vector<size_t> split_range_list( std::string const& str )
440 {
441  std::vector<size_t> result;
442 
443  auto is_digits = []( std::string const& s ){
444  return trim( s ).find_first_not_of( "0123456789" ) == std::string::npos;
445  };
446 
447  auto get_number = []( std::string const& s ){
448  size_t n;
449  sscanf( trim( s ).c_str(), "%zu", &n );
450  return n;
451  };
452 
453  if( trim( str ).empty() ) {
454  return result;
455  }
456 
457  auto const lst = split( str, "," );
458  for( auto const& le : lst ) {
459  // if just digits, done. if not, split -, repeat.
460  if( is_digits( le ) ) {
461  result.push_back( get_number( le ));
462  } else {
463  auto const rng = split( le, "-" );
464  if( rng.size() != 2 || ! is_digits( rng[0] ) || ! is_digits( rng[1] ) ) {
465  throw std::runtime_error( "Invalid range list string." );
466  }
467  auto const b = get_number( rng[0] );
468  auto const e = get_number( rng[1] );
469  for( size_t i = b; i <= e; ++i ) {
470  result.push_back( i );
471  }
472  }
473  }
474 
475  std::sort( result.begin(), result.end() );
476  return result;
477 }
478 
479 // =================================================================================================
480 // Manipulate
481 // =================================================================================================
482 
483 std::string wrap(
484  std::string const& text,
485  size_t line_length
486 ) {
487  /*
488  Code is adapted from https://www.rosettacode.org/wiki/Word_wrap#C.2B.2B,
489  which is published under the GNU Free Documentation License 1.2,
490  see https://www.gnu.org/licenses/old-licenses/fdl-1.2.html
491  We fixed the handling of overly long words,
492  and added correct handling of new lines in the text.
493  It is totally inefficient, but we only need this function for small texts anyway,
494  so for now, this is good enough.
495  */
496 
497  std::ostringstream output;
498  auto const lines = split( text, "\n", false );
499  for( auto const& line : lines ) {
500  std::istringstream text_stream( line );
501  std::string word;
502 
503  if( text_stream >> word ) {
504  output << word;
505  long space_left = static_cast<long>( line_length ) - static_cast<long>( word.length() );
506  while( text_stream >> word ) {
507  if( space_left < static_cast<long>( word.length() + 1 )) {
508  output << "\n" << word;
509  space_left = line_length - word.length();
510  } else {
511  output << " " << word;
512  space_left -= word.length() + 1;
513  }
514  }
515  }
516  output << "\n";
517  }
518 
519  return output.str();
520 }
521 
522 std::string indent(
523  std::string const& text,
524  std::string const& indentation
525 ) {
526  auto ret = indentation + replace_all( text, "\n", "\n" + indentation );
527  return trim_right( ret, indentation );
528 }
529 
530 std::string replace_all (
531  std::string const& text, std::string const& search, std::string const& replace
532 ) {
533  std::string tmp = text;
534  for (size_t pos = 0; ; pos += replace.length()) {
535  pos = tmp.find(search, pos);
536 
537  if (pos == std::string::npos){
538  break;
539  }
540 
541  tmp.erase(pos, search.length());
542  tmp.insert(pos, replace);
543  }
544  return tmp;
545 }
546 
547 // inline version
548 /*
549 void replace_all(
550  std::string &s, const std::string &search, const std::string &replace
551 ) {
552  for (size_t pos = 0; ; pos += replace.length() ) {
553  pos = s.find(search, pos);
554 
555  if (pos == string::npos)
556  break;
557 
558  s.erase(pos, search.length());
559  s.insert(pos, replace);
560  }
561 }
562 */
563 
564 std::string replace_all_chars (
565  std::string const& text,
566  std::string const& search_chars,
567  char replace
568 ) {
569  auto result = text;
570  for( auto& c : result ) {
571  if( search_chars.find( c ) != std::string::npos ) {
572  c = replace;
573  }
574  }
575  return result;
576 }
577 
578 std::string trim_right (
579  std::string const& s,
580  std::string const& delimiters
581 ) {
582  auto const pos = s.find_last_not_of(delimiters);
583  if( std::string::npos == pos ) {
584  return "";
585  } else {
586  return s.substr( 0, pos + 1 );
587  }
588 }
589 
590 std::string trim_left (
591  std::string const& s,
592  std::string const& delimiters
593 ) {
594  auto const pos = s.find_first_not_of(delimiters);
595  if( std::string::npos == pos ) {
596  return "";
597  } else {
598  return s.substr(pos);
599  }
600 }
601 
602 std::string trim (
603  std::string const& s,
604  std::string const& delimiters
605 ) {
606  return trim_left(trim_right(s, delimiters), delimiters);
607 }
608 
609 // =================================================================================================
610 // Case Conversion
611 // =================================================================================================
612 
613 #ifdef GENESIS_AVX
614 
615 inline void toggle_case_ascii_inplace_avx_( std::string& str, char char_a, char char_z )
616 {
617  // We use AVX2 here, which uses 256bit = 32byte. Hence, we move through the string in strides
618  // of 32. Concidentally, the ASCII marker for "upper/lower case" also has the value 32 (0x20),
619  // which might lead to confusion when reading the following code. Be warned.
620 
621  // Fill val_32 with 32x 0x20=32
622  auto static const val_32 = _mm256_set1_epi8( 0x20 );
623 
624  // Fill mask_a with 32x 'a/A', mask_z with 32x 'z/Z'
625  auto const mask_a = _mm256_set1_epi8( char_a );
626  auto const mask_z = _mm256_set1_epi8( char_z );
627 
628  // Loop in increments of 32, which is the AVX vector size in bytes.
629  for( size_t i = 0; i < str.size() / 32 * 32; i += 32 ) {
630  auto reg = _mm256_loadu_si256( reinterpret_cast<__m256i*>( &str[i] ) );
631 
632  // mask_az contains 0x00 where the character is between 'a/A' and 'z/Z', 0xff otherwise.
633  auto mask_az = _mm256_or_si256( _mm256_cmpgt_epi8( mask_a, reg ), _mm256_cmpgt_epi8( reg, mask_z ) );
634 
635  // Toggle the upper/lower char bit (0x20), 1 means lower case, 0 means upper case.
636  reg = _mm256_xor_si256( _mm256_andnot_si256( mask_az, val_32 ), reg );
637 
638  _mm256_storeu_si256( reinterpret_cast<__m256i*>( &str[i] ), reg );
639  }
640 
641  // Convert the rest that remains by toggling the upper/lower case bit.
642  for( size_t i = str.size() / 32 * 32; i < str.size(); ++i ) {
643  if( char_a <= str[i] && str[i] <= char_z ){
644  str[i] ^= 0x20;
645  }
646  }
647 }
648 
649 #endif // GENESIS_AVX
650 
651 void to_lower_ascii_inplace( std::string& str )
652 {
653  #ifdef GENESIS_AVX
654 
655  // Toggle the ascii case bit for all values between the two mask boundaries.
656  toggle_case_ascii_inplace_avx_( str, 'A', 'Z' );
657 
658  #else // GENESIS_AVX
659 
660  // Naive implementation that might use compiler-generated vector intrinsics.
661  for( auto& c : str ){
662  c = to_lower(c);
663  }
664 
665  #endif // GENESIS_AVX
666 }
667 
668 std::string to_lower_ascii( std::string const& str )
669 {
670  auto res = str;
671  to_lower_ascii_inplace( res );
672  return res;
673 }
674 
675 void to_upper_ascii_inplace( std::string& str )
676 {
677  #ifdef GENESIS_AVX
678 
679  // Toggle the ascii case bit for all values between the two mask boundaries.
680  toggle_case_ascii_inplace_avx_( str, 'a', 'z' );
681 
682  #else // GENESIS_AVX
683 
684  // Naive implementation that might use compiler-generated vector intrinsics.
685  for( auto& c : str ){
686  c = to_upper(c);
687  }
688 
689  #endif // GENESIS_AVX
690 }
691 
692 std::string to_upper_ascii( std::string const& str )
693 {
694  auto res = str;
695  to_upper_ascii_inplace( res );
696  return res;
697 }
698 
699 // =================================================================================================
700 // Normalize
701 // =================================================================================================
702 
703 std::string escape( std::string const& text )
704 {
705  // This is slow-ish, because the string is iterated multiple times. Could be done faster.
706  std::string tmp;
707  tmp = replace_all( text, "\r", "\\r" );
708  tmp = replace_all( tmp, "\n", "\\n" );
709  tmp = replace_all( tmp, "\t", "\\t" );
710  tmp = replace_all( tmp, "\"", "\\\"" );
711  tmp = replace_all( tmp, "\\", "\\\\" );
712  return tmp;
713 }
714 
715 std::string deescape( std::string const& text )
716 {
717  // Prepare a string that might be a bit too big, but saves reallocation.
718  std::string tmp;
719  tmp.reserve( text.size() );
720 
721  // Copy from text to tmp string, while deescaping.
722  for( size_t i = 0; i < text.size(); ++i ) {
723  if( text[ i ] == '\\' ) {
724  if( i + 1 >= text.size() ){
725  break;
726  }
727 
728  tmp += deescape( text[ i + 1 ] );
729  ++i;
730  } else {
731  tmp += text[ i ];
732  }
733  }
734  return tmp;
735 }
736 
737 char deescape( char c )
738 {
739  switch( c ) {
740  case 'r' :
741  return '\r';
742 
743  case 'n' :
744  return '\n';
745 
746  case 't' :
747  return '\t';
748 
749  default :
750  return c;
751  }
752 }
753 
754 // =================================================================================================
755 // Output
756 // =================================================================================================
757 
758 std::string repeat( std::string const& word, size_t times )
759 {
760  // Init and avoid repeated reallocation.
761  std::string result;
762  result.reserve( times * word.length() );
763 
764  // Concat repeats.
765  for( size_t i = 0; i < times; ++i ) {
766  result += word;
767  }
768  return result;
769 }
770 
771 std::string to_string_leading_zeros( size_t value, size_t length )
772 {
773  std::stringstream ss;
774  ss << std::setw( length ) << std::setfill( '0' ) << value;
775  return ss.str();
776 }
777 
778 std::string to_string_precise( double const value, int const precision )
779 {
780  // Simple and straight forward.
781  std::ostringstream s;
782  s << std::fixed << std::setprecision( precision ) << value;
783  return s.str();
784 }
785 
786 std::string to_string_rounded( double const value, int const precision )
787 {
788  // Get fixed precision string.
789  std::ostringstream s;
790  s << std::fixed << std::setprecision( precision ) << value;
791  auto str = s.str();
792 
793  // Truncate trailing zeros, unless there are only zeros after the decimal point. Then, also
794  // delete the decimal point.
795  size_t offset = 1;
796  size_t const last_nonzero = str.find_last_not_of('0');
797  if( str[ last_nonzero ] == '.' ) {
798  offset = 0;
799  }
800  str.erase( last_nonzero + offset, std::string::npos );
801  return str;
802 }
803 
804 } // namespace utils
805 } // namespace genesis
genesis::utils::deescape
std::string deescape(std::string const &text)
Return a string where backslash-escaped characters are transformed into their respective string form.
Definition: string.cpp:715
genesis::utils::indent
std::string indent(std::string const &text, std::string const &indentation)
Indent each line of text with indentation and return the result.
Definition: string.cpp:522
genesis::utils::trim_right
std::string trim_right(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with left trimmed white spaces.
Definition: string.cpp:578
genesis::utils::tail
std::string tail(std::string const &text, size_t lines)
Return the last lines of the text.
Definition: string.cpp:316
genesis::utils::to_upper_ascii_inplace
void to_upper_ascii_inplace(std::string &str)
Turn the given string to all-uppercase, ASCII-only, inline.
Definition: string.cpp:675
genesis::utils::equals_ci
bool equals_ci(std::string const &lhs, std::string const &rhs)
Compare two strings case insensitive.
Definition: string.cpp:67
genesis::utils::replace_all
std::string replace_all(std::string const &text, std::string const &search, std::string const &replace)
Return a copy of a string, where all occurrences of a search string are replaced by a replace string.
Definition: string.cpp:530
genesis::utils::contains_ci
bool contains_ci(std::vector< std::string > const &haystack, std::string const &needle)
Return whether a vector of strings contains a given string, case insensitive.
Definition: string.cpp:56
common.hpp
genesis::utils::to_string_rounded
std::string to_string_rounded(double const value, int const precision)
Return a string representation of the input value, using the provided precision value (determining it...
Definition: string.cpp:786
genesis::tree::length
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.
Definition: tree/common_tree/functions.cpp:160
genesis::utils::trim
std::string trim(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with trimmed white spaces.
Definition: string.cpp:602
genesis::utils::to_lower_ascii_inplace
void to_lower_ascii_inplace(std::string &str)
Turn the given string to all-lowercase, ASCII-only.
Definition: string.cpp:651
genesis::utils::replace_all_chars
std::string replace_all_chars(std::string const &text, std::string const &search_chars, char replace)
Replace all occurrences of the search_chars in text by the replace char.
Definition: string.cpp:564
genesis::utils::to_string_leading_zeros
std::string to_string_leading_zeros(size_t value, size_t length)
Return a string representation of a size_t value with a fixed length, that is, by adding leading zero...
Definition: string.cpp:771
genesis::utils::offset
void offset(Histogram &h, double value)
Definition: operations.cpp:47
genesis::utils::to_string_precise
std::string to_string_precise(double const value, int const precision)
Return a precise string representation of the input value, using the provided precision value (determ...
Definition: string.cpp:778
genesis::tree::equal
bool equal(Tree const &lhs, Tree const &rhs, std::function< bool(TreeNode const &, TreeNode const &) > node_comparator, std::function< bool(TreeEdge const &, TreeEdge const &) > edge_comparator)
Compare two trees for equality given binary comparator functionals for their nodes and edges.
Definition: tree/function/operators.cpp:81
genesis::utils::to_upper
constexpr char to_upper(char c) noexcept
Return the upper case version of a letter, ASCII-only.
Definition: char.hpp:230
string.hpp
Provides some commonly used string utility functions.
genesis::utils::head
std::string head(std::string const &text, size_t lines)
Return the first lines of the text.
Definition: string.cpp:307
genesis::utils::starts_with
bool starts_with(std::string const &text, std::string const &start)
Return whether a string starts with another string.
Definition: string.cpp:81
genesis::utils::compare_natural
int compare_natural(std::string const &lhs, std::string const &rhs)
Compare two strings with natural human sorting, that is "A1", "A2", "A100", instead of the standard s...
Definition: string.cpp:152
genesis::utils::to_upper_ascii
std::string to_upper_ascii(std::string const &str)
Return an all-uppercase copy of the given string, ASCII-only.
Definition: string.cpp:692
genesis::utils::join
Interval< DataType, NumericalType, IntervalKind > join(Interval< DataType, NumericalType, IntervalKind > const &a, Interval< DataType, NumericalType, IntervalKind > const &b)
Creates a new Interval that contains both intervals and whatever is between.
Definition: utils/containers/interval_tree/functions.hpp:127
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::ends_with
bool ends_with(std::string const &text, std::string const &ending)
Return whether a string ends with another string.
Definition: string.cpp:89
genesis::utils::is_digit
constexpr bool is_digit(char c) noexcept
Return whether a char is a digit (0-9), ASCII-only.
Definition: char.hpp:95
genesis::utils::to_lower_ascii
std::string to_lower_ascii(std::string const &str)
Return an all-lowercase copy of the given string, ASCII-only.
Definition: string.cpp:668
genesis::utils::repeat
std::string repeat(std::string const &word, size_t times)
Take a string and repeat it a given number of times.
Definition: string.cpp:758
genesis::utils::wrap
std::string wrap(std::string const &text, size_t line_length)
Wrap a text at a given line_length.
Definition: string.cpp:483
genesis::utils::split_at
std::vector< std::string > split_at(std::string const &str, std::string const &delimiter, const bool trim_empty)
Spilt a string into parts, given a delimiter string.
Definition: string.cpp:424
genesis::utils::to_lower
constexpr char to_lower(char c) noexcept
Return the lower case version of a letter, ASCII-only.
Definition: char.hpp:221
genesis::utils::match_wildcards
bool match_wildcards(std::string const &str, std::string const &pattern)
Return whether a string is matched by a wildcard pattern containing ? and * for single and mutliple (...
Definition: string.cpp:97
genesis::utils::split_
static std::vector< std::string > split_(std::string const &string, std::function< size_t(std::string const &, size_t)> find_pos, size_t advance_by, const bool trim_empty)
Local function that does the work for the split cuntions.
Definition: string.cpp:350
genesis::utils::trim_left
std::string trim_left(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with right trimmed white spaces.
Definition: string.cpp:590
genesis::utils::count_substring_occurrences
size_t count_substring_occurrences(std::string const &str, std::string const &sub)
Return the number of (possibly overlapping) occurrences of a substring in a string.
Definition: string.cpp:329
genesis::utils::split_range_list
std::vector< size_t > split_range_list(std::string const &str)
Split a string containing positive interger numbers into its parts and resolve ranges.
Definition: string.cpp:439
genesis::utils::split
std::vector< std::string > split(std::string const &str, std::string const &delimiters, const bool trim_empty)
Spilt a string into parts, given a delimiters set of chars.
Definition: string.cpp:386
genesis::utils::escape
std::string escape(std::string const &text)
Return a string where special chars are replaces by their escape sequence.
Definition: string.cpp:703