A library for working with phylogenetic and population genetic data.
v0.32.0
vcf_input_stream.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2024 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lczech@carnegiescience.edu>
20  Department of Plant Biology, Carnegie Institution For Science
21  260 Panama Street, Stanford, CA 94305, USA
22 */
23 
31 #ifdef GENESIS_HTSLIB
32 
34 
35 extern "C" {
36  #include <htslib/hts.h>
37  #include <htslib/vcf.h>
38 }
39 
40 #include <cstring>
41 
42 namespace genesis {
43 namespace population {
44 
45 // =================================================================================================
46 // Private Members
47 // =================================================================================================
48 
49 void VcfInputStream::increment_()
50 {
51  // We need this function in a source file instead of the header,
52  // solely to have the htslib functions not spam our namespace...
53  // We could also work with data directly from our VcfRecord here instead,
54  // but this would entail string copies, instead of just pointer copies;
55  // sometimes, plain C has its advantages ;-)
56  // (or we'd switch to C++17 with stringview instead, which would also solve the issue)
57 
58  assert( file_ );
59  assert( record_ );
60 
61  // If needed, we check the correct order of chromosomes and positions in the input file.
62  // To avoid making expensive temporary strings for this,
63  // we simply use char pointers to the htslib record data directly here.
64  char const* cur_chr = nullptr;
65  size_t cur_pos = 0;
66  if( expect_ordered_ ) {
67  cur_chr = ::bcf_hdr_id2name( record_->header().data(), record_->data()->rid );
68  cur_pos = record_->data()->pos + 1;
69  }
70 
71  // Read the next record. If this returns false, we are done.
72  if( ! record_->read_next( *file_ )) {
73  file_ = nullptr;
74  return;
75  }
76 
77  // Do the correct order check if needed.
78  if( expect_ordered_ ) {
79  assert( cur_chr != nullptr );
80  assert( cur_pos > 0 );
81 
82  // Get the new chr/pos.
83  char const* new_chr = ::bcf_hdr_id2name( record_->header().data(), record_->data()->rid );
84  size_t new_pos = record_->data()->pos + 1;
85 
86  // Check!
87  if(
88  ( strcmp( new_chr, cur_chr ) < 0 ) ||
89  ( strcmp( new_chr, cur_chr ) == 0 && new_pos <= cur_pos )
90  ) {
91  throw std::runtime_error(
92  "Malformed VCF file " + filename_ + ": " +
93  "unordered chromosomes and positions going from " +
94  std::string( cur_chr ) + ":" + std::to_string( cur_pos ) + " to " +
95  std::string( new_chr ) + ":" + std::to_string( new_pos )
96  );
97  }
98  }
99 }
100 
101 } // namespace population
102 } // namespace genesis
103 
104 #endif // htslib guard
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: function/genome_locus.hpp:52
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
vcf_input_stream.hpp