A library for working with phylogenetic and population genetic data.
v0.27.0
sync_reader.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2022 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lczech@carnegiescience.edu>
20  Department of Plant Biology, Carnegie Institution For Science
21  260 Panama Street, Stanford, CA 94305, USA
22 */
23 
32 
37 
38 #include <cassert>
39 #include <cstdint>
40 #include <stdexcept>
41 
42 namespace genesis {
43 namespace population {
44 
45 // =================================================================================================
46 // Reading & Parsing
47 // =================================================================================================
48 
53  utils::InputStream const& it,
54  std::string& cur_chr, size_t& cur_pos,
55  Variant const& new_var
56 ) {
57  if(
58  ( new_var.chromosome < cur_chr ) ||
59  ( new_var.chromosome == cur_chr && new_var.position <= cur_pos )
60  ) {
61  throw std::runtime_error(
62  "Malformed pileup " + it.source_name() + " at " + it.at() +
63  ": unordered chromosomes and positions"
64  );
65  }
66  cur_chr = new_var.chromosome;
67  cur_pos = new_var.position;
68 }
69 std::vector<Variant> SyncReader::read(
70  std::shared_ptr< utils::BaseInputSource > source
71 ) const {
72  std::vector<Variant> result;
73  utils::InputStream it( source );
74 
75  // Read, with correct order check, just in case.
76  std::string cur_chr = "";
77  size_t cur_pos = 0;
78  Variant variant;
79  while( parse_line_( it, variant, {}, false )) {
80  process_sync_correct_input_order_( it, cur_chr, cur_pos, variant );
81  result.push_back( std::move( variant ));
82  variant = Variant{};
83  }
84  return result;
85 }
86 
87 std::vector<Variant> SyncReader::read(
88  std::shared_ptr< utils::BaseInputSource > source,
89  std::vector<bool> const& sample_filter
90 ) const {
91  std::vector<Variant> result;
92  utils::InputStream it( source );
93 
94  // Read, with correct order check, just in case.
95  std::string cur_chr = "";
96  size_t cur_pos = 0;
97  Variant variant;
98  while( parse_line_( it, variant, sample_filter, true )) {
99  process_sync_correct_input_order_( it, cur_chr, cur_pos, variant );
100  result.push_back( std::move( variant ));
101  variant = Variant{};
102  }
103  return result;
104 }
105 
107  utils::InputStream& input_stream,
108  Variant& variant
109 ) const {
110  return parse_line_( input_stream, variant, {}, false );
111 }
112 
114  utils::InputStream& input_stream,
115  Variant& variant,
116  std::vector<bool> const& sample_filter
117 ) const {
118  return parse_line_( input_stream, variant, sample_filter, true );
119 }
120 
121 // =================================================================================================
122 // Internal Parsing
123 // =================================================================================================
124 
125 bool SyncReader::parse_line_(
126  utils::InputStream& input_stream,
127  Variant& variant,
128  std::vector<bool> const& sample_filter,
129  bool use_sample_filter
130 ) const {
131  using namespace genesis::utils;
132 
133  // Shorthand.
134  auto& it = input_stream;
135  if( !it ) {
136  variant = Variant{};
137  return false;
138  }
139 
140  // Read fixed columns for chromosome and position.
141  variant.chromosome = utils::read_until( it, []( char c ){ return c == '\t' || c == '\n'; });
142  if( variant.chromosome.empty() ) {
143  throw std::runtime_error(
144  "Malformed sync " + it.source_name() + " at " + it.at() +
145  ": empty chromosome name"
146  );
147  }
148  it.read_char_or_throw( '\t' );
149  variant.position = it.parse_unsigned_integer<size_t>();
150  if( variant.position == 0 ) {
151  throw std::runtime_error(
152  "Malformed sync " + it.source_name() + " at " + it.at() +
153  ": chromosome position == 0"
154  );
155  }
156  it.read_char_or_throw( '\t' );
157  if( !it || *it == '\n' ) {
158  throw std::runtime_error(
159  std::string("In ") + it.source_name() + ": Unexpected end of line at " + it.at()
160  );
161  }
162 
163  // Read and check fixed column for the refererence base.
164  auto const rb = to_upper( *it );
165  if( rb != 'A' && rb != 'C' && rb != 'G' && rb != 'T' && rb != 'N' && rb != '.' && rb != '*' ) {
166  throw std::runtime_error(
167  std::string("In ") + it.source_name() + ": Invalid reference base char " +
168  char_to_hex(rb) + " at " + it.at()
169  );
170  }
171  variant.reference_base = rb;
172  ++it;
173 
174  // Read the samples. We switch once for the first line, and thereafter check that we read the
175  // same number of samples each time.
176  size_t src_index = 0;
177  if( variant.samples.empty() ) {
178  while( it && *it != '\n' ) {
179  if( !use_sample_filter || ( src_index < sample_filter.size() && sample_filter[src_index] )) {
180  variant.samples.emplace_back();
181  parse_sample_( it, variant.samples.back() );
182  } else {
183  skip_sample_( it );
184  }
185  ++src_index;
186  }
187  } else {
188  // Here we need two indices, one over the samples in the file (source),
189  // and one for the samples that we are writing in our Variant (destination).
190  size_t dst_index = 0;
191  while( it && *it != '\n' ) {
192  // If the numbers do not match, go straight to the error check and throw.
193  if( dst_index >= variant.samples.size() ) {
194  break;
195  }
196 
197  // Parse or skip, depending on filter.
198  if( !use_sample_filter || ( src_index < sample_filter.size() && sample_filter[src_index] )) {
199  assert( dst_index < variant.samples.size() );
200  parse_sample_( it, variant.samples[dst_index] );
201  ++dst_index;
202  } else {
203  skip_sample_( it );
204  }
205  ++src_index;
206  }
207 
208  // Need to have the exact size of samples in the line.
209  if( dst_index != variant.samples.size() ) {
210  throw std::runtime_error(
211  "Malformed sync " + it.source_name() + " at " + it.at() +
212  ": Line with different number of samples."
213  );
214  }
215  }
216  if( use_sample_filter && src_index != sample_filter.size() ) {
217  throw std::runtime_error(
218  "Malformed sync " + it.source_name() + " at " + it.at() +
219  ": Number of samples in the line does not match the number of filter entries."
220  );
221  }
222 
223  // Sync does not have alt bases, so try to get one based on counts.
224  // Excluding the ref base, we use the base of the remaining three that has the highest total
225  // count across all samples, unless all of them are zero, in which case we do not set the
226  // alt base. We also skip cases where the ref is not in ACGT, as then alt is also meaningless.
227  variant.alternative_base = guess_alternative_base( variant, true );
228 
229  assert( !it || *it == '\n' );
230  ++it;
231  return true;
232 }
233 
234 // Only use intrinsics version for the compilers that support them!
235 #if defined(__GNUC__) || defined(__GNUG__) || defined(__clang__)
236 
237 void SyncReader::parse_sample_gcc_intrinsic_(
238  utils::InputStream& input_stream,
239  BaseCounts& sample
240 ) const {
241  using namespace genesis::utils;
242  auto& it = input_stream;
243  auto const buff = it.buffer();
244 
245  // We can only run this function if the buffer is guaranteed to contain at least 6 integers
246  // of the largest size that we can process here (8 bytes in bulk, with 7 of them for the digits,
247  // and one for the delimiter). If the buffer is smaller, because we are near
248  // the end of the file, we switch to the slow function instead.
249  // This check is conservative, as in most cases, we won't have numbers that long in the data.
250  // But for those last few entries in a large file, this does not really matter, so let's play
251  // it safe!
252  if( buff.second < 6 * 8 ) {
253  parse_sample_simple_( it, sample );
254  return;
255  }
256 
257  // This function adapts a lot of the ideas from our
258  // InputStream::parse_unsigned_integer_intrinsic_() function. See there for details on the
259  // techniques being used here. We here only provide shortened comments on the bit tricks.
260 
261  // We define a chunk to represent one count number, ACGT and N and D (deletions),
262  // which starts with a chunk that contains 8 bytes from the input stream, which will then be
263  // shortened and processed as needed to only contain the actual digits, and is then finally
264  // turned into the integer representation, step by step.
265  struct Chunk
266  {
267  // The data to process
268  uint64_t data = 0;
269 
270  // How many bytes are actually digits? This is stored as the number of digits plus one.
271  size_t length = 0;
272 
273  // Where in the buffer does this chunk (this sequence of digits) start?
274  size_t offset = 0;
275  };
276 
277  // Function to get a chunk, that is one set of chars representing a number. We here get 8 byte
278  // in bulk, and later check that those contain the delimiter to the next number or the end of
279  // the sample.
280  // If not, we only find out at the end, after having done all the parsing work, and will thus
281  // have wasted timed, but this only ever occurrs in cases with counts >= 10,000,000 (more than
282  // 7 digits, so that there is no delimiter within the 8 bytes), which should be rare in practice,
283  // and in which case we can live with the waste.
284  auto get_chunk_ = []( char const* buffer, size_t offset )
285  {
286  // Prepare a new chunk and store its offset.
287  Chunk chunk;
288  chunk.offset = offset;
289 
290  // Copy 8 bytes into the chunk that we process as one unit.
291  std::memcpy( &chunk.data, buffer + offset, sizeof( chunk.data ));
292 
293  // Helper macro functions to check whether a word has bytes that are less than or greater
294  // than some specified value, and mark these bytes.
295  // http://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord
296  // http://graphics.stanford.edu/~seander/bithacks.html#HasMoreInWord
297  auto const zero = static_cast<uint64_t>(0);
298  #define hasless(x,n) (((x)-~zero/255*(n))&~(x)&~zero/255*128)
299  #define hasmore(x,n) ((((x)+~zero/255*(127-(n)))|(x))&~zero/255*128)
300 
301  // Get all positions that are not digits, by marking a bit in their respective byte.
302  auto const l = hasless( chunk.data, '0' );
303  auto const m = hasmore( chunk.data, '9' );
304  auto const p = l | m;
305 
306  #undef hasless
307  #undef hasmore
308 
309  // Find the index of the first byte that is not a digit.
310  // The length is stored plus one here, due to how __builtin_ffs works. We need this later
311  // to check for the edge case (no delimiter found - the word contains only digits).
312  if( sizeof(int) == sizeof(std::uint64_t) ) {
313  chunk.length = __builtin_ffs(p) / 8;
314  } else if( sizeof(long) == sizeof(std::uint64_t) ) {
315  chunk.length = __builtin_ffsl(p) / 8;
316  } else if( sizeof(long long) == sizeof(std::uint64_t) ) {
317  chunk.length = __builtin_ffsll(p) / 8;
318  } else {
319  static_assert(
320  ( sizeof(int) == sizeof(std::uint64_t) ) ||
321  ( sizeof(long) == sizeof(std::uint64_t) ) ||
322  ( sizeof(long long) == sizeof(std::uint64_t) ),
323  "No compilter intrinsic __builtin_ffs[l][l] for std::uint64_t"
324  );
325  throw std::runtime_error(
326  "No compilter intrinsic __builtin_ffs[l][l] for std::uint64_t"
327  );
328  }
329 
330  return chunk;
331  };
332 
333  // Do the minimal amount of work that is necessary to get all chunks into position.
334  // That is, in this part, one line depends on the output of the previous, as we have to move
335  // forward in the buffer depending on how many digits we found.
336  // By doing the minimal work here (that is, not yet unpacking the chunk data into actual
337  // integers), we can maximize the CPU pipeline parallel part later (that does the unpacking).
338  // We start with offset 1, to skip the inital tab. We check later that there actually is a tab.
339  // The allele frequencies are stored in the order `A:T:C:G:N:del`,
340  // see https://sourceforge.net/p/popoolation2/wiki/Tutorial/
341  auto a_chunk = get_chunk_( buff.first, 1 );
342  auto t_chunk = get_chunk_( buff.first, a_chunk.offset + a_chunk.length );
343  auto c_chunk = get_chunk_( buff.first, t_chunk.offset + t_chunk.length );
344  auto g_chunk = get_chunk_( buff.first, c_chunk.offset + c_chunk.length );
345  auto n_chunk = get_chunk_( buff.first, g_chunk.offset + g_chunk.length );
346  auto d_chunk = get_chunk_( buff.first, n_chunk.offset + n_chunk.length );
347 
348  // This has to follow from the logic of the above.
349  assert( a_chunk.offset == 1 );
350  assert( t_chunk.offset == a_chunk.offset + a_chunk.length );
351  assert( c_chunk.offset == t_chunk.offset + t_chunk.length );
352  assert( g_chunk.offset == c_chunk.offset + c_chunk.length );
353  assert( n_chunk.offset == g_chunk.offset + g_chunk.length );
354  assert( d_chunk.offset == n_chunk.offset + n_chunk.length );
355 
356  // Function to process a chunk, that is, one number that is meant to be a count in the file.
357  // See InputStream::parse_unsigned_integer_intrinsic_() for details.
358  auto process_chunk_ = []( Chunk& chunk )
359  {
360  // We need to move the actual data chars that we want to parse to the left-most
361  // position for the following code to work.
362  chunk.data <<= (8 * ( 8 - chunk.length + 1 ));
363 
364  // 1-byte mask trick (works on 4 pairs of single digits)
365  std::uint64_t lower_digits = (chunk.data & 0x0f000f000f000f00) >> 8;
366  std::uint64_t upper_digits = (chunk.data & 0x000f000f000f000f) * 10;
367  chunk.data = lower_digits + upper_digits;
368 
369  // 2-byte mask trick (works on 2 pairs of two digits)
370  lower_digits = (chunk.data & 0x00ff000000ff0000) >> 16;
371  upper_digits = (chunk.data & 0x000000ff000000ff) * 100;
372  chunk.data = lower_digits + upper_digits;
373 
374  // 4-byte mask trick (works on pair of four digits)
375  lower_digits = (chunk.data & 0x0000ffff00000000) >> 32;
376  upper_digits = (chunk.data & 0x000000000000ffff) * 10000;
377  chunk.data = lower_digits + upper_digits;
378 
379  // Check that we got at least one digit, and at most 7.
380  // When there was no digit at all, the first char is a non-digit, meaning that length == 1.
381  // When there were only digits (all 8 bytes), __builtin_ffs returned length == 0 instead,
382  // as then no delimiter was found at all. Check both cases at once, by length > 1.
383  // Also, we assert that the intrinsic did not return anything too large.
384  assert( chunk.length <= 8 );
385  return ( chunk.length > 1 );
386  };
387 
388  // Now do the bulk processing, using CPU-level pipeline parallelization by offering all
389  // chunks to the CPU at once, with no dependencies between them. Any reasonable compiler will
390  // make use of this fact. This gives ~25% speedup in our tests compared to the already fast
391  // InputStream::parse_unsigned_integer_intrinsic_() function.
392  bool good = true;
393  good &= process_chunk_( a_chunk );
394  good &= process_chunk_( t_chunk );
395  good &= process_chunk_( c_chunk );
396  good &= process_chunk_( g_chunk );
397  good &= process_chunk_( n_chunk );
398  good &= process_chunk_( d_chunk );
399 
400  // We have now processed all chunk data, which now contain the actual numbers.
401  sample.a_count = a_chunk.data;
402  sample.t_count = t_chunk.data;
403  sample.c_count = c_chunk.data;
404  sample.g_count = g_chunk.data;
405  sample.n_count = n_chunk.data;
406  sample.d_count = d_chunk.data;
407 
408  // At the end do the error check, so that we are not wasting cycles to wait for the result
409  // of this check in the standard (non-error) case first. If this fails, no problem, we have
410  // not yet moved in the buffer, so just run the slow version on the same data again,
411  // to get proper parsing (for cases with more than 7 digits) or proper error reporting.
412  // We here check that the sample was delimited by a tab, that all number conversions were good
413  // (that is, they contained at least one digit, and at most 7), and were all delimited by colons.
414  // We also already asserted above that all offsets are at least 1, so that the subtraction
415  // of 1 here works without wrapping around the unsigned int.
416  if(
417  ( *it != '\t' ) ||
418  ! good ||
419  buff.first[ t_chunk.offset - 1 ] != ':' ||
420  buff.first[ c_chunk.offset - 1 ] != ':' ||
421  buff.first[ g_chunk.offset - 1 ] != ':' ||
422  buff.first[ n_chunk.offset - 1 ] != ':' ||
423  buff.first[ d_chunk.offset - 1 ] != ':'
424  ) {
425  // Repeat slowly to throw error at the correct position.
426  parse_sample_simple_( it, sample );
427  return;
428  }
429 
430  // If we are here, we have read a full sample with no error. This means that there were at least
431  // 6 digits, 5 colons, and the inital tab, so 12 chars in total that we jump.
432  assert( d_chunk.offset + d_chunk.length - 1 >= 12 );
433 
434  // Also, just because we can, assert all offsets...
435  assert( a_chunk.offset == 1 );
436  assert( t_chunk.offset >= 3 );
437  assert( c_chunk.offset >= 5 );
438  assert( g_chunk.offset >= 7 );
439  assert( n_chunk.offset >= 9 );
440  assert( d_chunk.offset >= 11 );
441 
442  // ...and lengths. Again, lengths are plus one, due to how __builtin_ffs works.
443  assert( a_chunk.length >= 2 );
444  assert( t_chunk.length >= 2 );
445  assert( c_chunk.length >= 2 );
446  assert( g_chunk.length >= 2 );
447  assert( n_chunk.length >= 2 );
448  assert( d_chunk.length >= 2 );
449 
450  // We can only process data with 7 or fewer digits. Let's assert this.
451  // Potential ways this could fail are, e.g., if somehome we produced random data or max int by
452  // subtracting one from zero, or accessed uninitialized memory, or some other horrible error.
453  assert( sample.a_count < 10000000 );
454  assert( sample.t_count < 10000000 );
455  assert( sample.c_count < 10000000 );
456  assert( sample.g_count < 10000000 );
457  assert( sample.n_count < 10000000 );
458  assert( sample.d_count < 10000000 );
459 
460  // Jump to the position after the last entry.
461  it.jump_unchecked( d_chunk.offset + d_chunk.length - 1 );
462 }
463 
464 #endif // defined(__GNUC__) || defined(__GNUG__) || defined(__clang__)
465 
466 void SyncReader::parse_sample_simple_(
467  utils::InputStream& input_stream,
468  BaseCounts& sample
469 ) const {
470  using namespace genesis::utils;
471  auto& it = input_stream;
472  it.read_char_or_throw( '\t' );
473 
474  // The allele frequencies are stored in the order `A:T:C:G:N:del`,
475  // see https://sourceforge.net/p/popoolation2/wiki/Tutorial/
476  sample.a_count = it.parse_unsigned_integer<size_t>();
477  it.read_char_or_throw( ':' );
478  sample.t_count = it.parse_unsigned_integer<size_t>();
479  it.read_char_or_throw( ':' );
480  sample.c_count = it.parse_unsigned_integer<size_t>();
481  it.read_char_or_throw( ':' );
482  sample.g_count = it.parse_unsigned_integer<size_t>();
483  it.read_char_or_throw( ':' );
484  sample.n_count = it.parse_unsigned_integer<size_t>();
485  it.read_char_or_throw( ':' );
486  sample.d_count = it.parse_unsigned_integer<size_t>();
487 }
488 
489 void SyncReader::parse_sample_(
490  utils::InputStream& input_stream,
491  BaseCounts& sample
492 ) const {
493  using namespace genesis::utils;
494  auto& it = input_stream;
495  auto const buff = it.buffer();
496 
497  // We find that almost all entries in real world data are single digits.
498  // Then, an entry has 11 chars: "0:0:6:0:0:0". Use this fact for super-charging the parsing.
499  // We check that all chars are exactly as we expect them. At the end, we only need to check that
500  // at position 12 there is no digit, that is, that the number is done and does not have any more
501  // digits. The check whether that char is valid in the context of the file is then done later
502  // in the next parsing step after finishing this function.
503  if(
504  buff.second >= 12 &&
505  buff.first[ 0 ] == '\t' &&
506  buff.first[ 2 ] == ':' &&
507  buff.first[ 4 ] == ':' &&
508  buff.first[ 6 ] == ':' &&
509  buff.first[ 8 ] == ':' &&
510  buff.first[ 10 ] == ':' &&
511  is_digit( buff.first[ 1 ] ) &&
512  is_digit( buff.first[ 3 ] ) &&
513  is_digit( buff.first[ 5 ] ) &&
514  is_digit( buff.first[ 7 ] ) &&
515  is_digit( buff.first[ 9 ] ) &&
516  is_digit( buff.first[ 11 ] ) &&
517  ! is_digit( buff.first[ 12 ] )
518  ) {
519  // Convert single digits from ASCII to their int value.
520  sample.a_count = buff.first[ 1 ] - '0';
521  sample.t_count = buff.first[ 3 ] - '0';
522  sample.c_count = buff.first[ 5 ] - '0';
523  sample.g_count = buff.first[ 7 ] - '0';
524  sample.n_count = buff.first[ 9 ] - '0';
525  sample.d_count = buff.first[ 11 ] - '0';
526 
527  // Jump to the position after the last entry.
528  it.jump_unchecked( 12 );
529  return;
530  }
531 
532  // If it's not the simply one-digit format, select the fastest alternative algorithm
533  // available for the given compiler.
534  #if defined(__GNUC__) || defined(__GNUG__) || defined(__clang__)
535 
536  parse_sample_gcc_intrinsic_( it, sample );
537 
538  #else
539 
540  parse_sample_simple_( it, sample );
541 
542  #endif
543 }
544 
545 void SyncReader::skip_sample_(
546  utils::InputStream& input_stream
547 ) const {
548  using namespace genesis::utils;
549 
550  // The skip functions are slow, because they need char by char access to the input stream.
551  // Need to fix this at some point. For now, just read into an unused dummy.
552  // Not worth bothering with this too much now, as this is really fast anyway.
553  BaseCounts dummy;
554  parse_sample_( input_stream, dummy );
555 
556  // Simply skip everything.
557  // input_stream.read_char_or_throw( '\t' );
558  // skip_while( input_stream, is_digit );
559  // input_stream.read_char_or_throw( ':' );
560  // skip_while( input_stream, is_digit );
561  // input_stream.read_char_or_throw( ':' );
562  // skip_while( input_stream, is_digit );
563  // input_stream.read_char_or_throw( ':' );
564  // skip_while( input_stream, is_digit );
565  // input_stream.read_char_or_throw( ':' );
566  // skip_while( input_stream, is_digit );
567  // input_stream.read_char_or_throw( ':' );
568  // skip_while( input_stream, is_digit );
569 }
570 
571 } // namespace population
572 } // namespace genesis
genesis::utils::InputStream::at
std::string at() const
Return a textual representation of the current input position in the form "line:column".
Definition: input_stream.hpp:481
genesis::utils::InputStream
Stream interface for reading data from an InputSource, that keeps track of line and column counters.
Definition: input_stream.hpp:81
parser.hpp
genesis::utils::InputStream::source_name
std::string source_name() const
Get the input source name where this stream reads from.
Definition: input_stream.hpp:522
genesis::population::Variant::position
size_t position
Definition: variant.hpp:65
sync_reader.hpp
genesis::tree::length
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.
Definition: tree/common_tree/functions.cpp:160
genesis::population::Variant::reference_base
char reference_base
Definition: variant.hpp:66
genesis::population::SyncReader::parse_line
bool parse_line(utils::InputStream &input_stream, Variant &sample_set) const
Definition: sync_reader.cpp:106
genesis::utils::offset
void offset(Histogram &h, double value)
Definition: operations.cpp:47
genesis::utils
Definition: placement/formats/edge_color.hpp:42
genesis::population::SyncReader::read
std::vector< Variant > read(std::shared_ptr< utils::BaseInputSource > source) const
Definition: sync_reader.cpp:69
genesis::utils::to_upper
constexpr char to_upper(char c) noexcept
Return the upper case version of a letter, ASCII-only.
Definition: char.hpp:230
genesis::population::Variant::samples
std::vector< BaseCounts > samples
Definition: variant.hpp:69
genesis::utils::read_until
std::string read_until(InputStream &source, char criterion)
Lexing function that reads from the stream until its current char equals the provided one....
Definition: scanner.hpp:254
genesis::population::Variant
A single variant at a position in a chromosome, along with BaseCounts for a set of samples.
Definition: variant.hpp:62
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::is_digit
constexpr bool is_digit(char c) noexcept
Return whether a char is a digit (0-9), ASCII-only.
Definition: char.hpp:95
genesis::population::guess_alternative_base
char guess_alternative_base(Variant const &variant, bool force)
Guess the alternative base of a Variant.
Definition: population/functions/functions.cpp:463
char.hpp
genesis::population::process_sync_correct_input_order_
void process_sync_correct_input_order_(utils::InputStream const &it, std::string &cur_chr, size_t &cur_pos, Variant const &new_var)
Local helper function to remove code duplication for the correct input order check.
Definition: sync_reader.cpp:52
genesis::utils::char_to_hex
std::string char_to_hex(char c, bool full)
Return the name and hex representation of a char.
Definition: char.cpp:118
scanner.hpp
genesis::population::Variant::alternative_base
char alternative_base
Definition: variant.hpp:67
functions.hpp
genesis::population::Variant::chromosome
std::string chromosome
Definition: variant.hpp:64