A library for working with phylogenetic and population genetic data.
v0.27.0
vcf_input_iterator.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_FORMATS_VCF_INPUT_ITERATOR_H_
2 #define GENESIS_POPULATION_FORMATS_VCF_INPUT_ITERATOR_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2022 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
34 #ifdef GENESIS_HTSLIB
35 
39 
40 #include <cassert>
41 #include <memory>
42 #include <stdexcept>
43 #include <string>
44 #include <utility>
45 #include <vector>
46 
47 namespace genesis {
48 namespace population {
49 
50 // =================================================================================================
51 // VCF/BCF Input Iterator
52 // =================================================================================================
53 
77 {
78 public:
79 
80  // -------------------------------------------------------------------------
81  // Member Types
82  // -------------------------------------------------------------------------
83 
86  using pointer = value_type const*;
87  using reference = value_type const&;
88  using difference_type = std::ptrdiff_t;
89  using iterator_category = std::input_iterator_tag;
90 
91  // -------------------------------------------------------------------------
92  // Constructors and Rule of Five
93  // -------------------------------------------------------------------------
94 
98  VcfInputIterator() = default;
99 
108  std::string const& filename,
109  bool expect_ordered = true
110  )
111  // Call the other constuctor, to avoid code duplication.
112  : VcfInputIterator( filename, std::vector<std::string>{}, false, expect_ordered )
113  {}
114 
125  std::string const& filename,
126  std::vector<std::string> const& sample_names,
127  bool inverse_sample_names = false,
128  bool expect_ordered = true
129  )
130  : filename_( filename )
131  , expect_ordered_( expect_ordered )
132  , file_( std::make_shared<HtsFile>( filename ))
133  , header_( std::make_shared<VcfHeader>( *file_ ))
134  , record_( std::make_shared<VcfRecord>( *header_ ))
135  {
136  // Filter sample columns by their name.
137  if( ! sample_names.empty() ) {
138  header_->set_samples( sample_names, inverse_sample_names );
139  }
140 
141  // Read the first record of the file.
142  increment_();
143  }
144 
145  ~VcfInputIterator() = default;
146 
147  VcfInputIterator( self_type const& ) = default;
148  VcfInputIterator( self_type&& ) = default;
149 
150  self_type& operator= ( self_type const& ) = default;
151  self_type& operator= ( self_type&& ) = default;
152 
153  // -------------------------------------------------------------------------
154  // Comparators
155  // -------------------------------------------------------------------------
156 
160  explicit operator bool() const
161  {
162  return static_cast<bool>( file_ );
163  }
164 
165  bool good() const
166  {
167  return static_cast<bool>( file_ );
168  }
169 
170  // -------------------------------------------------------------------------
171  // Accessors
172  // -------------------------------------------------------------------------
173 
174  std::string const& filename() const
175  {
176  return filename_;
177  }
178 
179  HtsFile const& hts_file() const
180  {
181  // Here and below we assert the existence of a pointed-to object in the shared pointer,
182  // which does not hold true if the default constructor of this class was used, in which
183  // case any of these dereferencing functions here are supposed to be invalid - so, by
184  // using assertions here, we can fail a bit more gracefully in such cases, instead of
185  // getting seg faults due to nullptrs.
186  assert( file_ );
187  return *file_;
188  }
189 
191  {
192  assert( file_ );
193  return *file_;
194  }
195 
196  VcfHeader const& header() const
197  {
198  assert( header_ );
199  return *header_;
200  }
201 
203  {
204  assert( header_ );
205  return *header_;
206  }
207 
208  VcfRecord const& record() const
209  {
210  assert( record_ );
211  return *record_;
212  }
213 
215  {
216  assert( record_ );
217  return *record_;
218  }
219 
220  VcfRecord const* operator ->() const
221  {
222  assert( record_ );
223  return &*record_;
224  }
225 
227  {
228  assert( record_ );
229  return &*record_;
230  }
231 
232  VcfRecord const& operator*() const
233  {
234  assert( record_ );
235  return *record_;
236  }
237 
239  {
240  assert( record_ );
241  return *record_;
242  }
243 
244  // -------------------------------------------------------------------------
245  // Iteration
246  // -------------------------------------------------------------------------
247 
249  {
250  increment_();
251  return *this;
252  }
253 
255  {
256  increment_();
257  return *this;
258  }
259 
260  bool operator==( self_type const& other ) const
261  {
262  // We want equality between iterators that are copies of each other, and inequality
263  // for non copies that were not default constructed. This also works for the default
264  // constructed iterator, which serves as the past-the-end marker.
265  return file_ == other.file_;
266  }
267 
268  bool operator!=( self_type const& other ) const
269  {
270  return !(*this == other);
271  }
272 
273  // -------------------------------------------------------------------------
274  // Private Members
275  // -------------------------------------------------------------------------
276 
277 private:
278 
279  void increment_();
280 
281  // -------------------------------------------------------------------------
282  // Data Members
283  // -------------------------------------------------------------------------
284 
285 private:
286 
287  std::string filename_;
288  bool expect_ordered_ = true;
289 
290  // htslib structs. We use shared pointers here to allow copying this iterator.
291  std::shared_ptr<HtsFile> file_;
292  std::shared_ptr<VcfHeader> header_;
293  std::shared_ptr<VcfRecord> record_;
294 
295 };
296 
297 } // namespace population
298 } // namespace genesis
299 
300 #endif // htslib guard
301 #endif // include guard
genesis::population::VcfInputIterator::filename
std::string const & filename() const
Definition: vcf_input_iterator.hpp:174
genesis::population::VcfInputIterator::difference_type
std::ptrdiff_t difference_type
Definition: vcf_input_iterator.hpp:88
genesis::population::VcfInputIterator::self_type
VcfInputIterator self_type
Definition: vcf_input_iterator.hpp:84
genesis::population::VcfInputIterator::record
VcfRecord & record()
Definition: vcf_input_iterator.hpp:214
genesis::population::VcfInputIterator::header
VcfHeader & header()
Definition: vcf_input_iterator.hpp:202
genesis::population::VcfInputIterator::operator->
VcfRecord const * operator->() const
Definition: vcf_input_iterator.hpp:220
genesis::population::VcfInputIterator::hts_file
HtsFile & hts_file()
Definition: vcf_input_iterator.hpp:190
genesis::population::VcfInputIterator::reference
value_type const & reference
Definition: vcf_input_iterator.hpp:87
genesis::population::VcfInputIterator::good
bool good() const
Definition: vcf_input_iterator.hpp:165
genesis::population::VcfInputIterator::VcfInputIterator
VcfInputIterator()=default
Create a default instance, with no input. This is also the past-the-end iterator.
vcf_header.hpp
genesis::population::VcfInputIterator::VcfInputIterator
VcfInputIterator(std::string const &filename, std::vector< std::string > const &sample_names, bool inverse_sample_names=false, bool expect_ordered=true)
Create an instance that reads from an input file name.
Definition: vcf_input_iterator.hpp:124
genesis::population::VcfInputIterator::hts_file
HtsFile const & hts_file() const
Definition: vcf_input_iterator.hpp:179
hts_file.hpp
genesis::population::VcfInputIterator::pointer
value_type const * pointer
Definition: vcf_input_iterator.hpp:86
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::VcfInputIterator::~VcfInputIterator
~VcfInputIterator()=default
genesis::population::VcfInputIterator::operator*
VcfRecord & operator*()
Definition: vcf_input_iterator.hpp:238
genesis::population::VcfInputIterator::operator*
VcfRecord const & operator*() const
Definition: vcf_input_iterator.hpp:232
genesis::population::VcfInputIterator::operator==
bool operator==(self_type const &other) const
Definition: vcf_input_iterator.hpp:260
genesis::population::VcfInputIterator::iterator_category
std::input_iterator_tag iterator_category
Definition: vcf_input_iterator.hpp:89
genesis::population::VcfInputIterator::operator=
self_type & operator=(self_type const &)=default
genesis::population::VcfRecord
Capture the information of a single SNP/variant line in a VCF/BCF file.
Definition: vcf_record.hpp:107
genesis::population::VcfInputIterator::VcfInputIterator
VcfInputIterator(std::string const &filename, bool expect_ordered=true)
Create an instance that reads from an input file name.
Definition: vcf_input_iterator.hpp:107
genesis::population::VcfInputIterator::header
VcfHeader const & header() const
Definition: vcf_input_iterator.hpp:196
vcf_record.hpp
genesis::population::VcfInputIterator::operator!=
bool operator!=(self_type const &other) const
Definition: vcf_input_iterator.hpp:268
genesis::population::VcfHeader
Capture the information from a header of a VCF/BCF file.
Definition: vcf_header.hpp:102
genesis::population::HtsFile
Wrap an ::htsFile struct.
Definition: hts_file.hpp:56
genesis::population::VcfInputIterator::record
VcfRecord const & record() const
Definition: vcf_input_iterator.hpp:208
genesis::population::VcfInputIterator::operator++
self_type & operator++()
Definition: vcf_input_iterator.hpp:248
genesis::population::VcfInputIterator
Iterate an input source and parse it as a VCF/BCF file.
Definition: vcf_input_iterator.hpp:76