A library for working with phylogenetic and population genetic data.
v0.27.0
parser.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2022 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lczech@carnegiescience.edu>
20  Department of Plant Biology, Carnegie Institution For Science
21  260 Panama Street, Stanford, CA 94305, USA
22 */
23 
35 
36 #include <cassert>
37 #include <cctype>
38 #include <limits>
39 #include <stdexcept>
40 
41 namespace genesis {
42 namespace utils {
43 
44 // =================================================================================================
45 // General Number String
46 // =================================================================================================
47 
48 std::string parse_number_string(
49  utils::InputStream& source
50 ) {
51  // Parse the format [+-][123][.456][eE[+-]789]
52  std::string result;
53 
54  // Need to keep track whether we found a number.
55  bool found_digits = false;
56 
57  // Sign.
58  if( source && ( *source == '+' || *source == '-' )) {
59  result += *source;
60  ++source;
61  }
62 
63  // Integer part. Read while char is digit.
64  while( source && utils::is_digit( *source )) {
65  result += *source;
66  ++source;
67  found_digits = true;
68  }
69 
70  // Decimal dot?
71  if( source && *source == '.' ) {
72  result += '.';
73  ++source;
74  }
75 
76  // Decimal part. Read while char is digit.
77  while( source && utils::is_digit( *source )) {
78  result += *source;
79  ++source;
80  found_digits = true;
81  }
82 
83  // If there was no match so far, stop here.
84  // Otherwise, a string starting with "E" will be read as a number...
85  if( ! found_digits ) {
86  return result;
87  }
88 
89  // Is there an exponent? If not, we are done.
90  if( source && char_match_ci( *source, 'e' ) ) {
91  result += *source;
92  ++source;
93  } else {
94  return result;
95  }
96 
97  // Sign.
98  if( source && ( *source == '+' || *source == '-' )) {
99  result += *source;
100  ++source;
101  }
102 
103  // Exponent. Read while char is digit.
104  while( source && utils::is_digit( *source )) {
105  result += *source;
106  ++source;
107  }
108 
109  return result;
110 }
111 
112 // =================================================================================================
113 // String
114 // =================================================================================================
115 
117  utils::InputStream& source,
118  bool use_escapes,
119  bool use_twin_quotes,
120  bool include_qmarks
121 ) {
122  // Prepare the return value.
123  std::string value = "";
124 
125  // Nothing to do.
126  if( !source ) {
127  return value;
128  }
129 
130  // Read the introductory quotation mark. We will read until it appears again.
131  char qmark = *source;
132  ++source;
133 
134  // Include the quotation mark if needed.
135  if( include_qmarks ) {
136  value += qmark;
137  }
138 
139  bool found_closing_qmark = false;
140  while( source ) {
141 
142  // Treat quotation marks.
143  if( *source == qmark ) {
144  ++source;
145 
146  // This is the end if we are not looking for double qmarks.
147  if( ! use_twin_quotes ) {
148  found_closing_qmark = true;
149  break;
150  }
151 
152  // If we are here, this is potentially a double qmark.
153  // If so, it belongs to the result string. If not, this is the end.
154  if( source && *source == qmark ) {
155  value += qmark;
156  } else {
157  found_closing_qmark = true;
158  break;
159  }
160 
161  // Treat escape sequences.
162  } else if( *source == '\\' && use_escapes ) {
163 
164  // Skip the backslash.
165  ++source;
166 
167  // We found an escaping backslash. This cannot be the end of the stream.
168  if( !source ) {
169  throw std::runtime_error(
170  "Unexpected end of " + source.source_name() + " at " + source.at()
171  + ". Expecting escape sequence."
172  );
173  }
174 
175  // Turn the char after the backslash into its correct de-escaped char.
176  value += deescape( *source );
177 
178  // Treat normal (non-escape) chars.
179  } else {
180  value += *source;
181  }
182 
183  // Next char.
184  ++source;
185  }
186 
187  // We need to find the closing qmark, otherwise it's an error.
188  // This case only occurs if the stream ends before the qmark is found, so assert this.
189  // (This is not true the other way round: the stream can have reached its end right after
190  // the closing qmark!)
191  if( ! found_closing_qmark ) {
192  assert( ! source );
193  throw std::runtime_error(
194  "Unexpected end of " + source.source_name() + " at " + source.at()
195  + ". Expected closing quotation mark."
196  );
197  }
198 
199  // Finish the return value.
200  if( include_qmarks ) {
201  value += qmark;
202  }
203  return value;
204 }
205 
206 } // namespace utils
207 } // namespace genesis
genesis::utils::InputStream::at
std::string at() const
Return a textual representation of the current input position in the form "line:column".
Definition: input_stream.hpp:481
genesis::utils::InputStream
Stream interface for reading data from an InputSource, that keeps track of line and column counters.
Definition: input_stream.hpp:81
parser.hpp
genesis::utils::deescape
std::string deescape(std::string const &text)
Return a string where backslash-escaped characters are transformed into their respective string form.
Definition: string.cpp:715
genesis::utils::InputStream::source_name
std::string source_name() const
Get the input source name where this stream reads from.
Definition: input_stream.hpp:522
genesis::utils::char_match_ci
constexpr bool char_match_ci(char c1, char c2) noexcept
Return whether two chars are the same, case insensitive, and ASCII-only.
Definition: char.hpp:243
string.hpp
Provides some commonly used string utility functions.
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::is_digit
constexpr bool is_digit(char c) noexcept
Return whether a char is a digit (0-9), ASCII-only.
Definition: char.hpp:95
char.hpp
scanner.hpp
genesis::utils::parse_quoted_string
std::string parse_quoted_string(utils::InputStream &source, bool use_escapes, bool use_twin_quotes, bool include_qmarks)
Read a string in quotation marks from a stream and return it.
Definition: parser.cpp:116
genesis::utils::parse_number_string
std::string parse_number_string(utils::InputStream &source)
Read a general number string from an input stream.
Definition: parser.cpp:48