A toolkit for working with phylogenetic data.
v0.24.0
char.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_IO_CHAR_H_
2 #define GENESIS_UTILS_IO_CHAR_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2020 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
34 #include <cctype>
35 #include <climits>
36 #include <string>
37 #include <type_traits>
38 
39 namespace genesis {
40 namespace utils {
41 
42 // =================================================================================================
43 // ASCII Char Functions
44 // =================================================================================================
45 
49 template <typename T>
50 constexpr bool is_ascii(std::true_type /* signed */, T c) noexcept
51 {
52  // All positive values are valid ASCII codes
53  return c >= 0;
54 }
55 
59 template <typename T>
60 constexpr bool is_ascii(std::false_type /* unsigned */, T c) noexcept
61 {
62  // All values less than or equal to 0x7F (DEL) are valid ASCII codes
63  return c <= 0x7F;
64 }
65 
69 constexpr bool is_ascii(char c) noexcept
70 {
71  static_assert(0x7F <= CHAR_MAX, "The compiler is not using ASCII. We cannot work like that!");
72  return is_ascii(std::is_signed<char>{}, c);
73 }
74 
79 constexpr bool is_cntrl(char c) noexcept
80 {
81  return c == 0x7F || (c >= 0 && c <= 0x1F);
82 }
83 
87 constexpr bool is_control(char c) noexcept
88 {
89  return (c >= 0x00 && c <= 0x08) || (c >= 0x0E && c <= 0x1F) || c == 0x7F;
90 }
91 
95 constexpr bool is_digit(char c) noexcept
96 {
97  static_assert('0' + 9 == '9', "The compiler is not using ASCII. We cannot work like that!");
98  return c >= '0' && c <= '9';
99 }
100 
104 constexpr bool is_xdigit(char c) noexcept
105 {
106  static_assert('A' + 5 == 'F', "The compiler is not using ASCII. We cannot work like that!");
107  static_assert('a' + 5 == 'f', "The compiler is not using ASCII. We cannot work like that!");
108  return is_digit(c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
109 }
110 
114 constexpr bool is_lower(char c) noexcept
115 {
116  static_assert('a' + 25 == 'z', "The compiler is not using ASCII. We cannot work like that!");
117  return c >= 'a' && c <= 'z';
118 }
119 
123 constexpr bool is_upper(char c) noexcept
124 {
125  static_assert('A' + 25 == 'Z', "The compiler is not using ASCII. We cannot work like that!");
126  return c >= 'A' && c <= 'Z';
127 }
128 
132 constexpr bool is_alpha(char c) noexcept
133 {
134  return is_lower(c) || is_upper(c);
135 }
136 
140 constexpr bool is_alnum(char c) noexcept
141 {
142  return is_alpha(c) || is_digit(c);
143 }
144 
149 constexpr bool is_punct(char c) noexcept
150 {
151  static_assert('!' + 1 == '"', "The compiler is not using ASCII. We cannot work like that!");
152  static_assert(':' + 1 == ';', "The compiler is not using ASCII. We cannot work like that!");
153  static_assert('[' + 1 == '\\', "The compiler is not using ASCII. We cannot work like that!");
154  static_assert('{' + 1 == '|', "The compiler is not using ASCII. We cannot work like that!");
155 
156  return (c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~');
157 }
158 
163 constexpr bool is_graph(char c) noexcept
164 {
165  return c >= '!' && c <='~';
166 }
167 
171 constexpr bool is_blank(char c) noexcept
172 {
173  return c == '\t' || c == ' ';
174 }
175 
179 constexpr bool is_newline(char c) noexcept
180 {
181  return c == '\n' || c == '\r';
182 }
183 
188 constexpr bool is_other_space(char c) noexcept
189 {
190  return c == '\f' || c == '\v';
191 }
192 
197 constexpr bool is_space(char c) noexcept
198 {
199  return is_blank(c) || is_newline(c) || is_other_space(c);
200 }
201 
206 constexpr bool is_print(char c) noexcept
207 {
208  return c >= ' ' && c <= '~';
209 }
210 
211 // =================================================================================================
212 // Conversion Functions
213 // =================================================================================================
214 
218 constexpr char to_lower(char c) noexcept
219 {
220  static_assert('z' - 'Z' == 0x20, "The compiler is not using ASCII. We cannot work like that!");
221  return ( 'A' <= c && c <= 'Z' ) ? c + 0x20 : c;
222 }
223 
227 constexpr char to_upper(char c) noexcept
228 {
229  static_assert('z' - 'Z' == 0x20, "The compiler is not using ASCII. We cannot work like that!");
230  return ( 'a' <= c && c <= 'z' ) ? c - 0x20 : c;
231 }
232 
233 // =================================================================================================
234 // Chars Helper Functions
235 // =================================================================================================
236 
240 constexpr bool char_match_ci( char c1, char c2 ) noexcept
241 {
242  return to_lower( c1 ) == to_lower( c2 );
243  // Locale aware version, not used any more:
244  // return std::tolower( static_cast<unsigned char>( c1 )) == std::tolower( static_cast<unsigned char>( c2 ));
245 }
246 
265 std::string char_to_hex( char c, bool full = true );
266 
273 std::string char_to_hex( unsigned char c, bool full = true );
274 
275 } // namespace utils
276 } // namespace genesis
277 
278 #endif // include guard
constexpr bool is_lower(char c) noexcept
Return whether a char is a lower case letter (a-z), ASCII-only.
Definition: char.hpp:114
constexpr bool is_control(char c) noexcept
Return whether a char is a control character, excluding white spaces, ASCII-only. ...
Definition: char.hpp:87
constexpr bool is_upper(char c) noexcept
Return whether a char is an upper case letter (A-Z), ASCII-only.
Definition: char.hpp:123
constexpr bool is_digit(char c) noexcept
Return whether a char is a digit (0-9), ASCII-only.
Definition: char.hpp:95
constexpr bool is_ascii(std::true_type, T c) noexcept
Implementation detail for is_ascii(char).
Definition: char.hpp:50
constexpr bool is_other_space(char c) noexcept
Return whether a char is some other white space charater that is neither space, tab, new line, or carriage return - that is, whether it is a form feed or a vertical tab.
Definition: char.hpp:188
std::string char_to_hex(char c, bool full)
Return the name and hex representation of a char.
Definition: char.cpp:67
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
constexpr bool is_graph(char c) noexcept
Return whether a char is a punctuation mark, according to isgraph of the cctype header, but ASCII-only.
Definition: char.hpp:163
constexpr bool is_print(char c) noexcept
Return whether a char is a printable character, according to isprint of the cctype header...
Definition: char.hpp:206
constexpr bool is_xdigit(char c) noexcept
Return whether a char is a hexadecimal digit (0-9 or A-F or a-f), ASCII-only.
Definition: char.hpp:104
constexpr bool is_blank(char c) noexcept
Return whether a char is either a space or a tab character.
Definition: char.hpp:171
constexpr bool is_punct(char c) noexcept
Return whether a char is a punctuation mark, according to ispunct of the cctype header, but ASCII-only.
Definition: char.hpp:149
constexpr bool is_alpha(char c) noexcept
Return whether a char is a letter (a-z or A-Z), ASCII-only.
Definition: char.hpp:132
constexpr char to_upper(char c) noexcept
Return the upper case version of a letter, ASCII-only.
Definition: char.hpp:227
constexpr bool char_match_ci(char c1, char c2) noexcept
Return whether two chars are the same, case insensitive, and ASCII-only.
Definition: char.hpp:240
constexpr char to_lower(char c) noexcept
Return the lower case version of a letter, ASCII-only.
Definition: char.hpp:218
constexpr bool is_alnum(char c) noexcept
Return whether a char is a letter (a-z or A-Z) or a digit (0-9), ASCII-only.
Definition: char.hpp:140
constexpr bool is_space(char c) noexcept
Return whether a char is some form of white space charater, so either space, tab, new line...
Definition: char.hpp:197
constexpr bool is_newline(char c) noexcept
Return whether a char is either a new line or a carriage return character.
Definition: char.hpp:179
constexpr bool is_cntrl(char c) noexcept
Return whether a char is a control character, according to iscntrl of the cctype> heade but ASCII-onl...
Definition: char.hpp:79