A library for working with phylogenetic and population genetic data.
v0.27.0
char.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_TEXT_CHAR_H_
2 #define GENESIS_UTILS_TEXT_CHAR_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2022 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
34 #include <cctype>
35 #include <climits>
36 #include <string>
37 #include <type_traits>
38 
39 namespace genesis {
40 namespace utils {
41 
42 // =================================================================================================
43 // ASCII Char Functions
44 // =================================================================================================
45 
49 template <typename T>
50 constexpr bool is_ascii(std::true_type /* signed */, T c) noexcept
51 {
52  // All positive values are valid ASCII codes
53  return c >= 0;
54 }
55 
59 template <typename T>
60 constexpr bool is_ascii(std::false_type /* unsigned */, T c) noexcept
61 {
62  // All values less than or equal to 0x7F (DEL) are valid ASCII codes
63  return c <= 0x7F;
64 }
65 
69 constexpr bool is_ascii(char c) noexcept
70 {
71  static_assert(0x7F <= CHAR_MAX, "The compiler is not using ASCII. We cannot work like that!");
72  return is_ascii(std::is_signed<char>{}, c);
73 }
74 
79 constexpr bool is_cntrl(char c) noexcept
80 {
81  return c == 0x7F || (c >= 0 && c <= 0x1F);
82 }
83 
87 constexpr bool is_control(char c) noexcept
88 {
89  return (c >= 0x00 && c <= 0x08) || (c >= 0x0E && c <= 0x1F) || c == 0x7F;
90 }
91 
95 constexpr bool is_digit(char c) noexcept
96 {
97  static_assert('0' == 48, "The compiler is not using ASCII. We cannot work like that!");
98  static_assert('0' + 9 == '9', "The compiler is not using ASCII. We cannot work like that!");
99  return c >= '0' && c <= '9';
100 }
101 
105 constexpr bool is_xdigit(char c) noexcept
106 {
107  static_assert('A' + 5 == 'F', "The compiler is not using ASCII. We cannot work like that!");
108  static_assert('a' + 5 == 'f', "The compiler is not using ASCII. We cannot work like that!");
109  return is_digit(c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
110 }
111 
115 constexpr bool is_lower(char c) noexcept
116 {
117  static_assert('a' == 97, "The compiler is not using ASCII. We cannot work like that!");
118  static_assert('a' + 25 == 'z', "The compiler is not using ASCII. We cannot work like that!");
119  return c >= 'a' && c <= 'z';
120 }
121 
125 constexpr bool is_upper(char c) noexcept
126 {
127  static_assert('A' == 65, "The compiler is not using ASCII. We cannot work like that!");
128  static_assert('A' + 25 == 'Z', "The compiler is not using ASCII. We cannot work like that!");
129  return c >= 'A' && c <= 'Z';
130 }
131 
135 constexpr bool is_alpha(char c) noexcept
136 {
137  return is_lower(c) || is_upper(c);
138 }
139 
143 constexpr bool is_alnum(char c) noexcept
144 {
145  return is_alpha(c) || is_digit(c);
146 }
147 
152 constexpr bool is_punct(char c) noexcept
153 {
154  static_assert('!' + 1 == '"', "The compiler is not using ASCII. We cannot work like that!");
155  static_assert(':' + 1 == ';', "The compiler is not using ASCII. We cannot work like that!");
156  static_assert('[' + 1 == '\\', "The compiler is not using ASCII. We cannot work like that!");
157  static_assert('{' + 1 == '|', "The compiler is not using ASCII. We cannot work like that!");
158 
159  return (c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~');
160 }
161 
166 constexpr bool is_graph(char c) noexcept
167 {
168  return c >= '!' && c <='~';
169 }
170 
174 constexpr bool is_blank(char c) noexcept
175 {
176  return c == '\t' || c == ' ';
177 }
178 
182 constexpr bool is_newline(char c) noexcept
183 {
184  return c == '\n' || c == '\r';
185 }
186 
191 constexpr bool is_other_space(char c) noexcept
192 {
193  return c == '\f' || c == '\v';
194 }
195 
200 constexpr bool is_space(char c) noexcept
201 {
202  return is_blank(c) || is_newline(c) || is_other_space(c);
203 }
204 
209 constexpr bool is_print(char c) noexcept
210 {
211  return c >= ' ' && c <= '~';
212 }
213 
214 // =================================================================================================
215 // Conversion Functions
216 // =================================================================================================
217 
221 constexpr char to_lower(char c) noexcept
222 {
223  static_assert('z' - 'Z' == 0x20, "The compiler is not using ASCII. We cannot work like that!");
224  return ( 'A' <= c && c <= 'Z' ) ? c + 0x20 : c;
225 }
226 
230 constexpr char to_upper(char c) noexcept
231 {
232  static_assert('z' - 'Z' == 0x20, "The compiler is not using ASCII. We cannot work like that!");
233  return ( 'a' <= c && c <= 'z' ) ? c - 0x20 : c;
234 }
235 
236 // =================================================================================================
237 // Chars Helper Functions
238 // =================================================================================================
239 
243 constexpr bool char_match_ci( char c1, char c2 ) noexcept
244 {
245  return to_lower( c1 ) == to_lower( c2 );
246  // Locale aware version, not used any more:
247  // return std::tolower( static_cast<unsigned char>( c1 )) == std::tolower( static_cast<unsigned char>( c2 ));
248 }
249 
268 std::string char_to_hex( char c, bool full = true );
269 
276 std::string char_to_hex( unsigned char c, bool full = true );
277 
278 } // namespace utils
279 } // namespace genesis
280 
281 #endif // include guard
genesis::utils::is_graph
constexpr bool is_graph(char c) noexcept
Return whether a char is a character with graphical representation, according to isgraph of the cctyp...
Definition: char.hpp:166
genesis::utils::is_control
constexpr bool is_control(char c) noexcept
Return whether a char is a control character, excluding white spaces, ASCII-only.
Definition: char.hpp:87
genesis::utils::is_ascii
constexpr bool is_ascii(std::true_type, T c) noexcept
Implementation detail for is_ascii(char).
Definition: char.hpp:50
genesis::utils::char_match_ci
constexpr bool char_match_ci(char c1, char c2) noexcept
Return whether two chars are the same, case insensitive, and ASCII-only.
Definition: char.hpp:243
genesis::utils::is_punct
constexpr bool is_punct(char c) noexcept
Return whether a char is a punctuation mark, according to ispunct of the cctype header,...
Definition: char.hpp:152
genesis::utils::is_alnum
constexpr bool is_alnum(char c) noexcept
Return whether a char is a letter (a-z or A-Z) or a digit (0-9), ASCII-only.
Definition: char.hpp:143
genesis::utils::to_upper
constexpr char to_upper(char c) noexcept
Return the upper case version of a letter, ASCII-only.
Definition: char.hpp:230
genesis::utils::is_lower
constexpr bool is_lower(char c) noexcept
Return whether a char is a lower case letter (a-z), ASCII-only.
Definition: char.hpp:115
genesis::utils::is_xdigit
constexpr bool is_xdigit(char c) noexcept
Return whether a char is a hexadecimal digit (0-9 or A-F or a-f), ASCII-only.
Definition: char.hpp:105
genesis::utils::is_space
constexpr bool is_space(char c) noexcept
Return whether a char is some form of white space charater, so either space, tab, new line,...
Definition: char.hpp:200
genesis::utils::is_other_space
constexpr bool is_other_space(char c) noexcept
Return whether a char is some other white space charater that is neither space, tab,...
Definition: char.hpp:191
genesis::utils::is_print
constexpr bool is_print(char c) noexcept
Return whether a char is a printable character, according to isprint of the cctype header,...
Definition: char.hpp:209
genesis::utils::is_newline
constexpr bool is_newline(char c) noexcept
Return whether a char is either a new line or a carriage return character.
Definition: char.hpp:182
genesis::utils::is_cntrl
constexpr bool is_cntrl(char c) noexcept
Return whether a char is a control character, according to iscntrl of the cctype> heade but ASCII-onl...
Definition: char.hpp:79
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::is_digit
constexpr bool is_digit(char c) noexcept
Return whether a char is a digit (0-9), ASCII-only.
Definition: char.hpp:95
genesis::utils::is_blank
constexpr bool is_blank(char c) noexcept
Return whether a char is either a space or a tab character.
Definition: char.hpp:174
genesis::utils::char_to_hex
std::string char_to_hex(char c, bool full)
Return the name and hex representation of a char.
Definition: char.cpp:118
genesis::utils::to_lower
constexpr char to_lower(char c) noexcept
Return the lower case version of a letter, ASCII-only.
Definition: char.hpp:221
genesis::utils::is_upper
constexpr bool is_upper(char c) noexcept
Return whether a char is an upper case letter (A-Z), ASCII-only.
Definition: char.hpp:125
genesis::utils::is_alpha
constexpr bool is_alpha(char c) noexcept
Return whether a char is a letter (a-z or A-Z), ASCII-only.
Definition: char.hpp:135