A library for working with phylogenetic and population genetic data.
v0.32.0
parser.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2024 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@sund.ku.dk>
20  University of Copenhagen, Globe Institute, Section for GeoGenetics
21  Oster Voldgade 5-7, 1350 Copenhagen K, Denmark
22 */
23 
35 
36 #include <algorithm>
37 #include <cassert>
38 #include <cctype>
39 #include <limits>
40 #include <stdexcept>
41 
42 // For C++17, we have a little speedup in the integer parsing part.
43 #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L)
44 
45  #include <charconv>
46 
47 #endif
48 
49 namespace genesis {
50 namespace utils {
51 
52 // =================================================================================================
53 // Integer
54 // =================================================================================================
55 
56 // -------------------------------------------------------------------------
57 // parse_unsigned_integer_gcc_intrinsic_
58 // -------------------------------------------------------------------------
59 
60 // Only use intrinsics version for the compilers that support them!
61 #if defined(__GNUC__) || defined(__GNUG__) || defined(__clang__)
62 
63 // Forward declaration
64 size_t parse_unsigned_integer_naive_( utils::InputStream& source );
65 
70 size_t parse_unsigned_integer_gcc_intrinsic_( utils::InputStream& source )
71 {
72  // This function only works on little endian systems (I think).
73  // We do not check this here, as so far, no one has tried to run our code on any machine
74  // that is not little endian. So we are good for now. In case this code needs to be adapted
75  // to big endian as well: I think the only change required is the `chunk <<= ...` that needs
76  // to turn into a right shift instead. Not entirely sure though.
77  // Also, in this function, we make use of the fact that our internal buffer is always way larger
78  // than any valid integer input. That is, we may read from after the block end, or even the
79  // stream end, but we have enough buffer for this to be okay (after all, we are just reading
80  // eight bytes here). We then check for this later.
81 
82  // Get the internals of the input stream.
83  auto buffer_pair = source.buffer();
84  auto buffer = buffer_pair.first;
85  size_t data_pos = 0;
86  size_t data_end = buffer_pair.second;
87 
88  // Copy 8 bytes into a chunk that we process as one unit.
89  std::uint64_t chunk = 0;
90  std::memcpy( &chunk, &buffer[ data_pos ], sizeof( chunk ));
91 
92  // Helper macro functions to check whether a word has bytes that are less than or greater
93  // than some specified value, and mark these bytes.
94  // http://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord
95  // http://graphics.stanford.edu/~seander/bithacks.html#HasMoreInWord
96  auto const zero = static_cast<uint64_t>(0);
97  #define hasless(x,n) (((x)-~zero/255*(n))&~(x)&~zero/255*128)
98  #define hasmore(x,n) ((((x)+~zero/255*(127-(n)))|(x))&~zero/255*128)
99 
100  // Get all positions that are not digits, by marking a bit in their respective byte.
101  auto const l = hasless( chunk, '0' );
102  auto const m = hasmore( chunk, '9' );
103  auto const p = l | m;
104 
105  // Example:
106  // String "167\n253\n" turns into chunk c (on little endian systems)
107  // \n 3 5 2 \n 7 6 1
108  // c 00001010 00110011 00110101 00110010 00001010 00110111 00110110 00110001
109  // l 10000000 00000000 00000000 00000000 10000000 00000000 00000000 00000000
110  // m 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
111  // p 10000000 00000000 00000000 00000000 10000000 00000000 00000000 00000000
112  // ^ ^
113  // with the two '\n' bytes marked.
114 
115  #undef hasless
116  #undef hasmore
117 
118  // Find the index of the first byte that is not a digit. We first get the bit position
119  // using an intrinsic, and then divite by 8 to get the byte. The branching to select the
120  // correct intrinsic should be resolved at compile time already.
121  // We are using __builtin_ffs and its variants:
122  // Returns one plus the index of the least significant 1-bit of x, or if x is zero, returns zero.
123  // https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html#Other-Builtins
124  int idx = 0;
125  if( sizeof(int) == sizeof(std::uint64_t) ) {
126  idx = __builtin_ffs(p) / 8;
127  } else if( sizeof(long) == sizeof(std::uint64_t) ) {
128  idx = __builtin_ffsl(p) / 8;
129  } else if( sizeof(long long) == sizeof(std::uint64_t) ) {
130  idx = __builtin_ffsll(p) / 8;
131  } else {
132  static_assert(
133  ( sizeof(int) == sizeof(std::uint64_t) ) ||
134  ( sizeof(long) == sizeof(std::uint64_t) ) ||
135  ( sizeof(long long) == sizeof(std::uint64_t) ),
136  "No compilter intrinsic __builtin_ffs[l][l] for std::uint64_t"
137  );
138  throw std::runtime_error(
139  "No compilter intrinsic __builtin_ffs[l][l] for std::uint64_t"
140  );
141  }
142  assert( 0 <= idx && idx <= 8 );
143 
144  // Not needed but kept for reference: Mask out all bits that we do not want.
145  // auto const mask = ~(~zero << ((idx-1)*8));
146  // chunk &= mask;
147 
148  // On little endian systems, we need to move the actual data chars that we want to parse to the
149  // left-most position for the following code to work. So, for our example from above, we need
150  // to move the "xxxx x761" in the chunk so that we get "7610 0000".
151  chunk <<= (8 * ( 8 - idx + 1 ));
152 
153  // Now use an O(log(n)) method of computing the result, where we combine adjacent parts into
154  // numbers, first 2 bytes, then 4 bytes, then all 8 bytes. Inspired by parse_8_chars() from
155  // https://kholdstare.github.io/technical/2020/05/26/faster-integer-parsing.html
156 
157  // 1-byte mask trick (works on 4 pairs of single digits)
158  std::uint64_t lower_digits = (chunk & 0x0f000f000f000f00) >> 8;
159  std::uint64_t upper_digits = (chunk & 0x000f000f000f000f) * 10;
160  chunk = lower_digits + upper_digits;
161 
162  // 2-byte mask trick (works on 2 pairs of two digits)
163  lower_digits = (chunk & 0x00ff000000ff0000) >> 16;
164  upper_digits = (chunk & 0x000000ff000000ff) * 100;
165  chunk = lower_digits + upper_digits;
166 
167  // 4-byte mask trick (works on pair of four digits)
168  lower_digits = (chunk & 0x0000ffff00000000) >> 32;
169  upper_digits = (chunk & 0x000000000000ffff) * 10000;
170  chunk = lower_digits + upper_digits;
171 
172  // Edge cases. We treat them at the end, so that in the standard cases, the processor
173  // does not come to a grinding halt when trying to figure out if these cases apply;
174  // this might be premature optimization, but in our tests, it made the function slightly faster.
175  // If the returned index is 0, there was no non-digit byte in the chunk,
176  // so we run the naive loop instead. We could also call this function here again recursively,
177  // summing up parts of large numbers. But that would mean that we need to do overflow
178  // detection and all that, and currently, this does not seem needed. Let's be lazy today.
179  // Furthermore, if the 8 bytes that we process here are at the end of the stream, we cannot
180  // confidently use them, in cases for example where the stream ends in a number, but does
181  // not have a new line char at the end. So in that case, better parse naievely.
182  // Lastly, if the index is 1, the first byte is not a digit, which is an error, as this function
183  // is only called from parsers that expect a number.
184  if( idx == 0 || data_pos + 8 >= data_end ) {
185  return parse_unsigned_integer_naive_( source );
186  }
187  if( idx == 1 ) {
188  throw std::runtime_error(
189  "Expecting integer in " + source.source_name() + " at " + source.at() + "."
190  );
191  }
192 
193  // Now move as far as needed in the buffer, and return our findings.
194  data_pos += idx - 1;
195  source.jump_unchecked( data_pos );
196  return chunk;
197 }
198 
199 #endif // defined(__GNUC__) || defined(__GNUG__) || defined(__clang__)
200 
201 // -------------------------------------------------------------------------
202 // parse_unsigned_integer_from_chars_
203 // -------------------------------------------------------------------------
204 
205 // Completely deactivated for now, as we are not using it anyway,
206 // and it's causing trouble with clang 5-7.
207 // #if ( defined(__GNUC__) || defined(__GNUG__) ) && ( !defined(__clang__) || ( __clang_major__ >= 8 ))
208 
209 // We used to have to following in our main CMakeList.txt to try to support this,
210 // but that lead to linker errors downstream, and was hence not worth keeping.
211 // Keeping it here for future reference, but likeley not needed any more.
212 
213 // # With clang 5 to 7, we run into a bug (https://stackoverflow.com/a/49795448) of clang,
214 // # because we are using `__builtin_mul_overflow` in genesis/utils/io/input_stream.cpp
215 // # This here tries to fix this. If this causes more trouble in the future, we might instead
216 // # use the native algorithm in that function...
217 // # Update: Yes, that causes trouble, as we then get linker errors:
218 // # undefined reference to symbol '_Unwind_Resume@@GCC_3.0'
219 // # see https://stackoverflow.com/a/22774687 - so instead we deactivate this hack here for now,
220 // # and use a different implementation in input_stream.cpp instead when using clang 5-7.
221 // # if(
222 // # "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang"
223 // # AND ( NOT ( ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS "5" ))
224 // # AND ( ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS "8" )
225 // # )
226 // # message(STATUS "Building with Clang 5, 6 or 7. Switching to --rtlib=compiler-rt")
227 // #
228 // # set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ")
229 // # set( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --rtlib=compiler-rt" )
230 // #
231 // # set( GENESIS_CXX_FLAGS "${GENESIS_CXX_FLAGS} ")
232 // # set( GENESIS_EXE_LINKER_FLAGS "${GENESIS_EXE_LINKER_FLAGS} --rtlib=compiler-rt" )
233 // # endif()
234 
235 // This implementation needs __builtin_mul_overflow, which is buggy on Clang 5-7.
236 // We currently do not use it anyway, so let's deactivate it completely for now.
237 // #if ( defined(__GNUC__) || defined(__GNUG__) ) && ( !defined(__clang__) || ( __clang_major__ >= 8 ))
238 #if 0
239 
244 size_t parse_unsigned_integer_from_chars_( utils::InputStream& source )
245 {
246  // Re-implementation of the gcc from_chars() code.
247  // https://github.com/gcc-mirror/gcc/blob/12bb62fbb47bd2848746da53c72ed068a4274daf/libstdc++-v3/include/std/charconv
248  // Currently not in use and not well tested!
249 
250  // Get the internals of the input stream.
251  auto buffer_pair = source.buffer();
252  auto buffer = buffer_pair.first;
253  size_t data_pos = 0;
254  size_t data_end = buffer_pair.second;
255 
256  // Prepare. We alias T, in case we want to refactor to a template function at some point.
257  using T = size_t;
258  using namespace utils;
259  T x = 0;
260 
261  // Hardcoded base 10. See below for other version that allows to select base.
262  auto raise_and_add_ = []( T& val, unsigned char c ) {
263  return !(
264  __builtin_mul_overflow( val, 10, &val ) ||
265  __builtin_add_overflow( val, c, &val )
266  );
267  };
268  // int const base = 10;
269  // auto raise_and_add_ = [base]( T& val, unsigned char c ) {
270  // return !(
271  // __builtin_mul_overflow( val, base, &val ) ||
272  // __builtin_add_overflow( val, c, &val )
273  // );
274  // };
275 
276  auto from_chars_digit_ = [&]( char const*& first, char const* last, T& val ) {
277  while( first != last ) {
278  char const c = *first;
279  if( is_digit(c) ) {
280  if( !raise_and_add_(val, c - '0') ) {
281  return false;
282  }
283  first++;
284  } else {
285  return true;
286  }
287  }
288  return true;
289  };
290 
291  char const* start = &buffer[ data_pos ];
292  char const* end = &buffer[ data_end ];
293  auto const valid = from_chars_digit_( start, end, x );
294  auto const dist = start - &buffer[ data_pos ];
295 
296  if( dist == 0 ) {
297  throw std::runtime_error(
298  "Expecting integer in " + source_name() + " at " + at() + "."
299  );
300  } else if( !valid ) {
301  throw std::overflow_error(
302  "Numerical overflow in " + source_name() + " at " + at() + "."
303  );
304  } else if( std::is_signed<T>::value ) {
305  assert( false );
306  // T tmp;
307  // if (__builtin_mul_overflow(x, sign, &tmp)) {
308  // throw std::overflow_error(
309  // "Numerical overflow in " + source_name() + " at " + at() + "."
310  // );
311  // }
312  }
313 
314  // Move to where we the parsing left us.
315  data_pos += dist;
316  source.jump_unchecked( data_pos );
317  return x;
318 }
319 
320 #endif // ( defined(__GNUC__) || defined(__GNUG__) ) && ( !defined(__clang__) || ( __clang_major__ >= 8 ))
321 
322 // -------------------------------------------------------------------------
323 // parse_unsigned_integer_std_from_chars_
324 // -------------------------------------------------------------------------
325 
326 // Only use C++17 code if we are compiled with that version.
327 #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L)
328 
333 size_t parse_unsigned_integer_std_from_chars_( utils::InputStream& source )
334 {
335  // Uses the C++17 std::from_chars() function.
336  // Currently not in use and not well tested!
337 
338  // Get the internals of the input stream.
339  auto buffer_pair = source.buffer();
340  auto buffer = buffer_pair.first;
341  size_t data_pos = 0;
342  size_t data_end = buffer_pair.second;
343 
344  // Prepare. We alias T, in case we want to refactor to a template function at some point.
345  using T = size_t;
346  using namespace utils;
347  T x = 0;
348 
349  // Fastest method accoing to
350  // https://www.fluentcpp.com/2018/07/27/how-to-efficiently-convert-a-string-to-an-int-in-c/
351  // is from_chars(), so let's us it!
352 
353  auto const conv = std::from_chars( &buffer[ data_pos ], &buffer[ data_end ], x );
354 
355  // How many chars did we consume?
356  auto const dist = conv.ptr - &buffer[ data_pos ];
357 
358  // Check that we processed at least one digit, as this function is only called when the
359  // input format requires an integer. This is equivalent to the check in the non C++17 version
360  // below for data_pos >= data_end || ! is_digit( buffer[ data_pos ] )
361  if( dist == 0 ) {
362  throw std::runtime_error(
363  "Expecting integer in " + source.source_name() + " at " + source.at() + "."
364  );
365  }
366 
367  if( conv.ec != std::errc() ) {
368  if( conv.ec == std::errc::result_out_of_range ) {
369  throw std::overflow_error(
370  "Numerical overflow in " + source.source_name() + " at " + source.at() + "."
371  );
372  } else if( conv.ec == std::errc::invalid_argument ) {
373  // Cannot happen, as we above checked that there is at least one digit.
374  assert( false );
375  } else {
376  // Cannot happen, as we caught every case of `ec`.
377  assert( false );
378  }
379 
380  // In either case, we need to stop here.
381  throw std::overflow_error(
382  "Integer parsing error in " + source.source_name() + " at " + source.at() + "."
383  );
384  }
385 
386  // Move to where we the parsing left us.
387  data_pos += dist;
388  source.jump_unchecked( data_pos );
389  return x;
390 }
391 
392 #endif // ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L)
393 
394 // -------------------------------------------------------------------------
395 // parse_unsigned_integer_naive_
396 // -------------------------------------------------------------------------
397 
402 {
403  // Get the internals of the input stream.
404  auto buffer_pair = source.buffer();
405  auto buffer = buffer_pair.first;
406  size_t data_pos = 0;
407  size_t data_end = buffer_pair.second;
408 
409  // Prepare. We alias T, in case we want to refactor to a template function at some point.
410  using T = size_t;
411  using namespace utils;
412  T x = 0;
413 
414  if( data_pos >= data_end || ! utils::is_digit( buffer[ data_pos ] ) ) {
415  throw std::runtime_error(
416  "Expecting digit in " + source.source_name() + " at " + source.at() + "."
417  );
418  }
419 
420  while(( data_pos < data_end ) && utils::is_digit( buffer[ data_pos ] )) {
421  T y = buffer[ data_pos ] - '0';
422 
423  if( x > ( std::numeric_limits<T>::max() - y ) / 10 ) {
424  throw std::overflow_error(
425  "Numerical overflow in " + source.source_name() + " at " + source.at() + "."
426  );
427  }
428 
429  x = 10 * x + y;
430 
431  // In the original function that was not part of this class, we simply called
432  // advance() here, to move to the next char. However, here, we already know that
433  // we have data_pos < data_end, and that we do not have a new line.
434  // Furthermore, we also can ignore the update for block length while in this loop
435  // (or maybe even completely), as it does not matter much if we move a bit into the
436  // second block before starting the reading thread again. This loop here cannot
437  // iterate that many times anyway before we overflow the interger.
438  // So let's simply move on to the next char.
439  assert( data_pos < data_end );
440  assert( buffer[ data_pos ] != '\n' );
441  ++data_pos;
442  }
443 
444  // Move to where we the parsing left us.
445  source.jump_unchecked( data_pos );
446  return x;
447 }
448 
449 // -------------------------------------------------------------------------
450 // parse_unsigned_integer_size_t
451 // -------------------------------------------------------------------------
452 
454 {
455  // Select the fastest alternative available for a given compiler and C++ version.
456  #if defined(__GNUC__) || defined(__GNUG__) || defined(__clang__)
457 
458  // If we have GCC or Clang, use our own handcrafted fast-as-hell implementation.
459  return parse_unsigned_integer_gcc_intrinsic_( source );
460 
461  // #elif ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L)
462  //
463  // // Otherwise, if this is C++17, at least use its own fast version,
464  // // that can use some compiler intrinsics itself.
465  // return parse_unsigned_integer_std_from_chars_();
466 
467  #else
468 
469  // If neither, just use the slow, naive loop.
470  return parse_unsigned_integer_naive_( source );
471 
472  #endif
473 }
474 
475 // =================================================================================================
476 // General Number String
477 // =================================================================================================
478 
480  utils::InputStream& source
481 ) {
482  // Parse the format [+-][123][.456][eE[+-]789]
483  std::string result;
484 
485  // Need to keep track whether we found a number.
486  bool found_digits = false;
487 
488  // Sign.
489  if( source && ( *source == '+' || *source == '-' )) {
490  result += *source;
491  ++source;
492  }
493 
494  // Integer part. Read while char is digit.
495  while( source && utils::is_digit( *source )) {
496  result += *source;
497  ++source;
498  found_digits = true;
499  }
500 
501  // Decimal dot?
502  if( source && *source == '.' ) {
503  result += '.';
504  ++source;
505  }
506 
507  // Decimal part. Read while char is digit.
508  while( source && utils::is_digit( *source )) {
509  result += *source;
510  ++source;
511  found_digits = true;
512  }
513 
514  // If there was no match so far, stop here.
515  // Otherwise, a string starting with "E" will be read as a number...
516  if( ! found_digits ) {
517  return result;
518  }
519 
520  // Is there an exponent? If not, we are done.
521  if( source && char_match_ci( *source, 'e' ) ) {
522  result += *source;
523  ++source;
524  } else {
525  return result;
526  }
527 
528  // Sign.
529  if( source && ( *source == '+' || *source == '-' )) {
530  result += *source;
531  ++source;
532  }
533 
534  // Exponent. Read while char is digit.
535  while( source && utils::is_digit( *source )) {
536  result += *source;
537  ++source;
538  }
539 
540  return result;
541 }
542 
543 // =================================================================================================
544 // String
545 // =================================================================================================
546 
548  utils::InputStream& source,
549  bool use_escapes,
550  bool use_twin_quotes,
551  bool include_qmarks
552 ) {
553  // Prepare the return value.
554  std::string value = "";
555 
556  // Nothing to do.
557  if( !source ) {
558  return value;
559  }
560 
561  // Read the introductory quotation mark. We will read until it appears again.
562  char qmark = *source;
563  ++source;
564 
565  // Include the quotation mark if needed.
566  if( include_qmarks ) {
567  value += qmark;
568  }
569 
570  bool found_closing_qmark = false;
571  while( source ) {
572 
573  // Treat quotation marks.
574  if( *source == qmark ) {
575  ++source;
576 
577  // This is the end if we are not looking for double qmarks.
578  if( ! use_twin_quotes ) {
579  found_closing_qmark = true;
580  break;
581  }
582 
583  // If we are here, this is potentially a double qmark.
584  // If so, it belongs to the result string. If not, this is the end.
585  if( source && *source == qmark ) {
586  value += qmark;
587  } else {
588  found_closing_qmark = true;
589  break;
590  }
591 
592  // Treat escape sequences.
593  } else if( *source == '\\' && use_escapes ) {
594 
595  // Skip the backslash.
596  ++source;
597 
598  // We found an escaping backslash. This cannot be the end of the stream.
599  if( !source ) {
600  throw std::runtime_error(
601  "Unexpected end of " + source.source_name() + " at " + source.at()
602  + ". Expecting escape sequence."
603  );
604  }
605 
606  // Turn the char after the backslash into its correct de-escaped char.
607  value += deescape( *source );
608 
609  // Treat normal (non-escape) chars.
610  } else {
611  value += *source;
612  }
613 
614  // Next char.
615  ++source;
616  }
617 
618  // We need to find the closing qmark, otherwise it's an error.
619  // This case only occurs if the stream ends before the qmark is found, so assert this.
620  // (This is not true the other way round: the stream can have reached its end right after
621  // the closing qmark!)
622  if( ! found_closing_qmark ) {
623  assert( ! source );
624  throw std::runtime_error(
625  "Unexpected end of " + source.source_name() + " at " + source.at()
626  + ". Expected closing quotation mark."
627  );
628  }
629 
630  // Finish the return value.
631  if( include_qmarks ) {
632  value += qmark;
633  }
634  return value;
635 }
636 
637 } // namespace utils
638 } // namespace genesis
genesis::utils::InputStream::at
std::string at() const
Return a textual representation of the current input position in the form "line:column".
Definition: input_stream.hpp:437
genesis::utils::InputStream
Stream interface for reading data from an InputSource, that keeps track of line and column counters.
Definition: input_stream.hpp:88
parser.hpp
genesis::utils::deescape
std::string deescape(std::string const &text)
Return a string where backslash-escaped characters are transformed into their respective string form.
Definition: string.cpp:958
genesis::utils::InputStream::source_name
std::string source_name() const
Get the input source name where this stream reads from.
Definition: input_stream.hpp:478
genesis::utils::InputStream::buffer
std::pair< char const *, size_t > buffer()
Direct access to the internal buffer.
Definition: input_stream.hpp:390
genesis::utils::char_match_ci
constexpr bool char_match_ci(char c1, char c2) noexcept
Return whether two chars are the same, case insensitive, and ASCII-only.
Definition: char.hpp:243
string.hpp
Provides some commonly used string utility functions.
genesis::utils::parse_unsigned_integer_size_t
size_t parse_unsigned_integer_size_t(utils::InputStream &source)
Parse the input source as an unsigned int into a size_t.
Definition: parser.cpp:453
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::is_digit
constexpr bool is_digit(char c) noexcept
Return whether a char is a digit (0-9), ASCII-only.
Definition: char.hpp:95
char.hpp
scanner.hpp
genesis::utils::parse_quoted_string
std::string parse_quoted_string(utils::InputStream &source, bool use_escapes, bool use_twin_quotes, bool include_qmarks)
Read a string in quotation marks from a stream and return it.
Definition: parser.cpp:547
genesis::utils::parse_unsigned_integer_naive_
size_t parse_unsigned_integer_naive_(utils::InputStream &source)
Naive parsing that simply loops over chars.
Definition: parser.cpp:401
genesis::utils::InputStream::jump_unchecked
void jump_unchecked(size_t n)
Jump forward in the stream by a certain amount of chars.
Definition: input_stream.cpp:609
genesis::utils::parse_number_string
std::string parse_number_string(utils::InputStream &source)
Read a general number string from an input stream.
Definition: parser.cpp:479