A library for working with phylogenetic and population genetic data.
v0.27.0
char.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2022 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lczech@carnegiescience.edu>
20  Department of Plant Biology, Carnegie Institution For Science
21  260 Panama Street, Stanford, CA 94305, USA
22 */
23 
32 
33 #include <array>
34 #include <cassert>
35 #include <cctype>
36 #include <iomanip>
37 #include <ios>
38 #include <iostream>
39 #include <sstream>
40 #include <stdexcept>
41 
42 namespace genesis {
43 namespace utils {
44 
45 // =================================================================================================
46 // Chars Functions
47 // =================================================================================================
48 
56 static const std::array<std::string, 128> ascii_symbols_ = {{
57  "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", "BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI",
58  "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
59  " ", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/",
60  "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?",
61  "@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
62  "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[", "\\", "]", "^", "_",
63  "`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
64  "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "{", "|", "}", "~", "DEL"
65 }};
66 
76 static const std::array<std::string, 128> ascii_names_ = {{
77  "control: null", "control: start of heading", "control: start of text", "control: end of text",
78  "control: end of transmission", "control: enquiry", "control: acknowledge", "control: bell",
79  "control: backspace", "control: horizontal tab", "control: NL line feed, new line",
80  "control: vertical tab", "control: NP form feed, new page", "control: carriage return",
81  "control: shift out", "control: shift in", "control: data link escape",
82  "control: device control 1", "control: device control 2", "control: device control 3",
83  "control: device control 4", "control: negative acknowledge", "control: synchronous idle",
84  "control: end of trans. block", "control: cancel", "control: end of medium",
85  "control: substitute", "control: escape", "control: file separator",
86  "control: group separator", "control: record separator", "control: unit separator",
87  "symbol: space", "symbol: exclamation mark", "symbol: double quotation mark",
88  "symbol: number sign, pound", "symbol: dollar sign", "symbol: percent sign",
89  "symbol: ampersand", "symbol: apostrophe, single quote mark", "symbol: left parenthesis",
90  "symbol: right parenthesis", "symbol: asterisk", "symbol: plus sign", "symbol: comma",
91  "symbol: minus sign, hyphen", "symbol: period, decimal point, full stop",
92  "symbol: slash, virgule, solidus",
93  "digit: 0", "digit: 1", "digit: 2", "digit: 3", "digit: 4",
94  "digit: 5", "digit: 6", "digit: 7", "digit: 8", "digit: 9",
95  "symbol: colon", "symbol: semicolon", "symbol: less-than sign", "symbol: equal sign",
96  "symbol: greater-than sign", "symbol: question mark", "symbol: commercial at sign",
97  "letter: capital A", "letter: capital B", "letter: capital C", "letter: capital D",
98  "letter: capital E", "letter: capital F", "letter: capital G", "letter: capital H",
99  "letter: capital I", "letter: capital J", "letter: capital K", "letter: capital L",
100  "letter: capital M", "letter: capital N", "letter: capital O", "letter: capital P",
101  "letter: capital Q", "letter: capital R", "letter: capital S", "letter: capital T",
102  "letter: capital U", "letter: capital V", "letter: capital W", "letter: capital X",
103  "letter: capital Y", "letter: capital Z",
104  "symbol: left square bracket", "symbol: backslash, reverse solidus",
105  "symbol: right square bracket", "symbol: spacing circumflex accent, caret",
106  "symbol: spacing underscore, low line, horizontal bar",
107  "symbol: spacing grave accent, back apostrophe",
108  "letter: small a", "letter: small b", "letter: small c", "letter: small d", "letter: small e",
109  "letter: small f", "letter: small g", "letter: small h", "letter: small i", "letter: small j",
110  "letter: small k", "letter: small l", "letter: small m", "letter: small n", "letter: small o",
111  "letter: small p", "letter: small q", "letter: small r", "letter: small s", "letter: small t",
112  "letter: small u", "letter: small v", "letter: small w", "letter: small x", "letter: small y",
113  "letter: small z",
114  "symbol: left brace, left curly bracket", "symbol: vertical bar",
115  "symbol: right brace, right curly bracket", "symbol: tilde accent", "control: delete"
116 }};
117 
118 std::string char_to_hex( char c, bool full )
119 {
120  // By conversion to unsigned char, we transform potential negative numbers to their mod 256
121  // equivalent. This helps to mitigate the irritation that is caused by allowing char to be
122  // signed or unsigned in the standard.
123  return char_to_hex( static_cast<unsigned char>( c ), full );
124 
125  // The following check is obsolte now, as we accept the whole byte range.
126 
127  // Check that we are in the valid ascii range. If not, outputting an ascii char does not make
128  // sense anyway. We cast to int here, because char can either be signed or unsigned, and hence
129  // different compilers will warn that one of the comparisons is always false. But we need both,
130  // because of compilers differ...
131  // if( static_cast<int>( c ) < 0 || static_cast<int>( c ) > 127 ) {
132  // throw std::runtime_error( "Invalid ASCII char " + std::to_string( static_cast<int>( c )));
133  // }
134 }
135 
136 std::string char_to_hex( unsigned char c, bool full )
137 {
138  std::stringstream ss;
139  if( full ) {
140  if( c < 128 ) {
141  if( std::isprint(c) ) {
142  assert( std::string(1, c) == ascii_symbols_[c] );
143  ss << "'" << std::string( 1, c ) << "'";
144  } else {
145  ss << ascii_symbols_[c];
146  }
147  } else {
148  ss << "non-ASCII char";
149  }
150 
151  ss << " (0x";
152  }
153  ss << std::hex << std::uppercase << std::setw(2) << std::setfill('0') << static_cast<int>( c );
154  if( full ) {
155  if( c < 128 ) {
156  ss << "; " << ascii_names_[c] << ")";
157  } else {
158  ss << ")";
159  }
160  }
161  return ss.str();
162 }
163 
164 } // namespace utils
165 } // namespace genesis
genesis::utils::ascii_names_
static const std::array< std::string, 128 > ascii_names_
List of all ASCII names.
Definition: char.cpp:76
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
char.hpp
genesis::utils::ascii_symbols_
static const std::array< std::string, 128 > ascii_symbols_
List of all ASCII symbols.
Definition: char.cpp:56
genesis::utils::char_to_hex
std::string char_to_hex(char c, bool full)
Return the name and hex representation of a char.
Definition: char.cpp:118