A library for working with phylogenetic and population genetic data.
v0.27.0
string.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_TEXT_STRING_H_
2 #define GENESIS_UTILS_TEXT_STRING_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2022 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
35 
36 #include <algorithm>
37 #include <cctype>
38 #include <functional>
39 #include <iostream>
40 #include <sstream>
41 #include <string>
42 #include <vector>
43 
44 namespace genesis {
45 namespace utils {
46 
47 // =================================================================================================
48 // Compare
49 // =================================================================================================
50 
54 bool contains_ci( std::vector<std::string> const& haystack, std::string const& needle );
55 
59 bool equals_ci( std::string const& lhs, std::string const& rhs );
60 
64 bool starts_with( std::string const & text, std::string const & start );
65 
69 bool ends_with( std::string const & text, std::string const & ending );
70 
75 bool match_wildcards( std::string const& str, std::string const& pattern );
76 
83 int compare_natural( std::string const& lhs, std::string const& rhs );
84 
88 template <class T = std::string>
89 struct NaturalLess : public std::binary_function<T, T, bool> {
90  bool operator()( T const& lhs, T const& rhs ) const {
91  return compare_natural( lhs, rhs ) < 0;
92  }
93 };
94 
99 template <typename RandomAccessIterator>
100 inline void sort_natural(
101  RandomAccessIterator first,
102  RandomAccessIterator last
103 ) {
104  using T = typename RandomAccessIterator::value_type;
105  std::sort( first, last, NaturalLess<T>() );
106 }
107 
108 // =================================================================================================
109 // Substrings
110 // =================================================================================================
111 
115 std::string head( std::string const& text, size_t lines = 10 );
116 
120 std::string tail( std::string const& text, size_t lines = 10 );
121 
122 // =================================================================================================
123 // Find and Count
124 // =================================================================================================
125 
129 size_t count_substring_occurrences( std::string const& str, std::string const& sub );
130 
138 std::vector<std::string> split (
139  std::string const& str,
140  std::string const& delimiters = " ",
141  const bool trim_empty = true
142 );
143 
152 std::vector<std::string> split (
153  std::string const& str,
154  std::function<bool (char)> delimiter_predicate,
155  const bool trim_empty = true
156 );
157 
165 std::vector<std::string> split_at (
166  std::string const& str,
167  std::string const& delimiter,
168  const bool trim_empty = true
169 );
170 
177 std::vector<size_t> split_range_list( std::string const& str );
178 
179 // =================================================================================================
180 // Manipulate
181 // =================================================================================================
182 
186 std::string wrap(
187  std::string const& text,
188  size_t line_length = 80
189 );
190 
198 std::string indent(
199  std::string const& text,
200  std::string const& indentation = " "
201 );
202 
207 std::string replace_all (
208  std::string const& text,
209  std::string const& search,
210  std::string const& replace
211 );
212 
216 std::string replace_all_chars (
217  std::string const& text,
218  std::string const& search_chars,
219  char replace
220 );
221 
225 std::string trim_right (
226  std::string const& s,
227  std::string const& delimiters = " \f\n\r\t\v"
228 );
229 
233 std::string trim_left (
234  std::string const& s,
235  std::string const& delimiters = " \f\n\r\t\v"
236 );
237 
241 std::string trim (
242  std::string const& s,
243  std::string const& delimiters = " \f\n\r\t\v"
244 );
245 
246 // =================================================================================================
247 // Case Conversion
248 // =================================================================================================
249 
253 inline std::string to_lower( std::string const& str )
254 {
255  auto res = str;
256  for( auto& c : res ){
257  // Weird C relicts need weird conversions...
258  // See https://en.cppreference.com/w/cpp/string/byte/tolower
259  c = static_cast<char>( std::tolower( static_cast<unsigned char>( c )));
260  }
261  return res;
262 }
263 
267 inline void to_lower_inplace( std::string& str )
268 {
269  for( auto& c : str ){
270  c = static_cast<char>( std::tolower( static_cast<unsigned char>( c )));
271  }
272 }
273 
277 inline std::string to_upper( std::string const& str )
278 {
279  auto res = str;
280  for( auto& c : res ){
281  c = static_cast<char>( std::toupper( static_cast<unsigned char>( c )));
282  }
283  return res;
284 }
285 
289 inline void to_upper_inplace( std::string& str )
290 {
291  for( auto& c : str ){
292  c = static_cast<char>( std::toupper( static_cast<unsigned char>( c )));
293  }
294 }
295 
299 void to_lower_ascii_inplace( std::string& str );
300 
304 std::string to_lower_ascii( std::string const& str );
305 
311 void to_upper_ascii_inplace( std::string& str );
312 
316 std::string to_upper_ascii( std::string const& str );
317 
318 // =================================================================================================
319 // Normalize
320 // =================================================================================================
321 
329 std::string escape( std::string const& text );
330 
342 std::string deescape( std::string const& text );
343 
352 char deescape( char c );
353 
354 // =================================================================================================
355 // Output
356 // =================================================================================================
357 
361 std::string repeat( std::string const& word, size_t times );
362 
369 std::string to_string_leading_zeros( size_t value, size_t length = 6 );
370 
382 std::string to_string_precise( double value, int precision = 6 );
383 
395 std::string to_string_rounded( double value, int precision = 6 );
396 
411 template <typename T>
412 std::string to_string_nice( T const& v )
413 {
414  std::ostringstream s;
415  s << v;
416  return s.str();
417 }
418 
429 template <typename T>
430 std::string join( T const& v, std::string const& delimiter = ", " )
431 {
432  std::ostringstream s;
433  for( auto const& i : v ) {
434  if( &i != &(*v.begin()) ) {
435  s << delimiter;
436  }
437  s << i;
438  }
439  return s.str();
440 }
441 
448 template <>
449 inline std::string join<std::vector<unsigned char>>(
450  std::vector<unsigned char> const& v, std::string const& delimiter
451 ) {
452  std::ostringstream s;
453  for( auto const& i : v ) {
454  if( &i != &(*v.begin()) ) {
455  s << delimiter;
456  }
457  s << static_cast<int>( i );
458  }
459  return s.str();
460 }
461 
469 template<typename T>
470 std::string to_bit_string(
471  T const x, char const zero = '0', char const one = '1', bool const byte_space = true
472 ) {
473  static_assert(
474  std::is_unsigned<T>::value,
475  "Can only use to_bit_string() with unsigned types."
476  );
477 
478  std::string binary = "";
479  T mask = 1;
480  for( size_t i = 0; i < sizeof(T) * 8; ++i ) {
481  if( byte_space && i > 0 && i % 8 == 0 ) {
482  binary = ' ' + binary;
483  }
484  if( mask & x ) {
485  binary = one + binary;
486  } else {
487  binary = zero + binary;
488  }
489  mask <<= 1;
490  }
491  return binary;
492 }
493 
494 } // namespace utils
495 } // namespace genesis
496 
497 #endif // include guard
genesis::utils::deescape
std::string deescape(std::string const &text)
Return a string where backslash-escaped characters are transformed into their respective string form.
Definition: string.cpp:715
genesis::utils::indent
std::string indent(std::string const &text, std::string const &indentation)
Indent each line of text with indentation and return the result.
Definition: string.cpp:522
genesis::utils::trim_right
std::string trim_right(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with left trimmed white spaces.
Definition: string.cpp:578
genesis::utils::tail
std::string tail(std::string const &text, size_t lines)
Return the last lines of the text.
Definition: string.cpp:316
genesis::utils::to_upper_ascii_inplace
void to_upper_ascii_inplace(std::string &str)
Turn the given string to all-uppercase, ASCII-only, inline.
Definition: string.cpp:675
genesis::utils::equals_ci
bool equals_ci(std::string const &lhs, std::string const &rhs)
Compare two strings case insensitive.
Definition: string.cpp:67
genesis::utils::replace_all
std::string replace_all(std::string const &text, std::string const &search, std::string const &replace)
Return a copy of a string, where all occurrences of a search string are replaced by a replace string.
Definition: string.cpp:530
genesis::utils::contains_ci
bool contains_ci(std::vector< std::string > const &haystack, std::string const &needle)
Return whether a vector of strings contains a given string, case insensitive.
Definition: string.cpp:56
genesis::utils::to_string_rounded
std::string to_string_rounded(double const value, int const precision)
Return a string representation of the input value, using the provided precision value (determining it...
Definition: string.cpp:786
genesis::tree::length
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.
Definition: tree/common_tree/functions.cpp:160
genesis::utils::to_upper_inplace
void to_upper_inplace(std::string &str)
Turn the given string to all-uppercase, locale-aware.
Definition: string.hpp:289
genesis::utils::trim
std::string trim(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with trimmed white spaces.
Definition: string.cpp:602
genesis::utils::to_lower_ascii_inplace
void to_lower_ascii_inplace(std::string &str)
Turn the given string to all-lowercase, ASCII-only.
Definition: string.cpp:651
genesis::utils::replace_all_chars
std::string replace_all_chars(std::string const &text, std::string const &search_chars, char replace)
Replace all occurrences of the search_chars in text by the replace char.
Definition: string.cpp:564
genesis::utils::to_string_leading_zeros
std::string to_string_leading_zeros(size_t value, size_t length)
Return a string representation of a size_t value with a fixed length, that is, by adding leading zero...
Definition: string.cpp:771
genesis::utils::to_string_precise
std::string to_string_precise(double const value, int const precision)
Return a precise string representation of the input value, using the provided precision value (determ...
Definition: string.cpp:778
genesis::utils::to_upper
constexpr char to_upper(char c) noexcept
Return the upper case version of a letter, ASCII-only.
Definition: char.hpp:230
genesis::utils::head
std::string head(std::string const &text, size_t lines)
Return the first lines of the text.
Definition: string.cpp:307
genesis::utils::starts_with
bool starts_with(std::string const &text, std::string const &start)
Return whether a string starts with another string.
Definition: string.cpp:81
genesis::utils::compare_natural
int compare_natural(std::string const &lhs, std::string const &rhs)
Compare two strings with natural human sorting, that is "A1", "A2", "A100", instead of the standard s...
Definition: string.cpp:152
genesis::utils::to_upper_ascii
std::string to_upper_ascii(std::string const &str)
Return an all-uppercase copy of the given string, ASCII-only.
Definition: string.cpp:692
genesis::utils::join
Interval< DataType, NumericalType, IntervalKind > join(Interval< DataType, NumericalType, IntervalKind > const &a, Interval< DataType, NumericalType, IntervalKind > const &b)
Creates a new Interval that contains both intervals and whatever is between.
Definition: utils/containers/interval_tree/functions.hpp:127
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::ends_with
bool ends_with(std::string const &text, std::string const &ending)
Return whether a string ends with another string.
Definition: string.cpp:89
char.hpp
genesis::utils::to_string_nice
std::string to_string_nice(T const &v)
Return a string representation of a given value.
Definition: string.hpp:412
genesis::utils::to_lower_ascii
std::string to_lower_ascii(std::string const &str)
Return an all-lowercase copy of the given string, ASCII-only.
Definition: string.cpp:668
genesis::utils::repeat
std::string repeat(std::string const &word, size_t times)
Take a string and repeat it a given number of times.
Definition: string.cpp:758
genesis::utils::sort_natural
void sort_natural(RandomAccessIterator first, RandomAccessIterator last)
Sort a range of std::string (or convertible to std::string) elements, using natural sorting; see comp...
Definition: string.hpp:100
genesis::utils::wrap
std::string wrap(std::string const &text, size_t line_length)
Wrap a text at a given line_length.
Definition: string.cpp:483
genesis::utils::NaturalLess::operator()
bool operator()(T const &lhs, T const &rhs) const
Definition: string.hpp:90
genesis::utils::split_at
std::vector< std::string > split_at(std::string const &str, std::string const &delimiter, const bool trim_empty)
Spilt a string into parts, given a delimiter string.
Definition: string.cpp:424
genesis::utils::to_lower
constexpr char to_lower(char c) noexcept
Return the lower case version of a letter, ASCII-only.
Definition: char.hpp:221
genesis::utils::match_wildcards
bool match_wildcards(std::string const &str, std::string const &pattern)
Return whether a string is matched by a wildcard pattern containing ? and * for single and mutliple (...
Definition: string.cpp:97
genesis::utils::to_bit_string
std::string to_bit_string(T const x, char const zero='0', char const one='1', bool const byte_space=true)
Return the bit representation of an unsigned int.
Definition: string.hpp:470
genesis::utils::trim_left
std::string trim_left(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with right trimmed white spaces.
Definition: string.cpp:590
genesis::utils::to_lower_inplace
void to_lower_inplace(std::string &str)
Turn the given string to all-lowercase, locale-aware.
Definition: string.hpp:267
genesis::utils::NaturalLess
Functor class to compare to strings with natural "human" sorting, see compare_natural().
Definition: string.hpp:89
genesis::utils::count_substring_occurrences
size_t count_substring_occurrences(std::string const &str, std::string const &sub)
Return the number of (possibly overlapping) occurrences of a substring in a string.
Definition: string.cpp:329
genesis::utils::split_range_list
std::vector< size_t > split_range_list(std::string const &str)
Split a string containing positive interger numbers into its parts and resolve ranges.
Definition: string.cpp:439
genesis::utils::split
std::vector< std::string > split(std::string const &str, std::string const &delimiters, const bool trim_empty)
Spilt a string into parts, given a delimiters set of chars.
Definition: string.cpp:386
genesis::utils::escape
std::string escape(std::string const &text)
Return a string where special chars are replaces by their escape sequence.
Definition: string.cpp:703