A library for working with phylogenetic and population genetic data.
v0.27.0
vcf_record.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2021 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lczech@carnegiescience.edu>
20  Department of Plant Biology, Carnegie Institution For Science
21  260 Panama Street, Stanford, CA 94305, USA
22 */
23 
31 #ifdef GENESIS_HTSLIB
32 
34 
37 
38 extern "C" {
39  #include <htslib/hts.h>
40  #include <htslib/vcf.h>
41 }
42 
43 #include <cassert>
44 #include <cstdint>
45 #include <cstdlib>
46 #include <cstring>
47 #include <stdexcept>
48 
49 namespace genesis {
50 namespace population {
51 
52 // =================================================================================================
53 // Typedefs and Enums
54 // =================================================================================================
55 
56 // VariantType
57 static_assert(
58  static_cast<int>( VcfRecord::VariantType::kRef ) == VCF_REF,
59  "Definitions of VCF_REF in htslib and of VariantType::kRef in genesis differ. "
60  "Please submit a bug report at https://github.com/lczech/genesis/issues"
61 );
62 static_assert(
63  static_cast<int>( VcfRecord::VariantType::kSnp ) == VCF_SNP,
64  "Definitions of VCF_SNP in htslib and of VariantType::kSnp in genesis differ. "
65  "Please submit a bug report at https://github.com/lczech/genesis/issues"
66 );
67 static_assert(
68  static_cast<int>( VcfRecord::VariantType::kMnp ) == VCF_MNP,
69  "Definitions of VCF_MNP in htslib and of VariantType::kMnp in genesis differ. "
70  "Please submit a bug report at https://github.com/lczech/genesis/issues"
71 );
72 static_assert(
73  static_cast<int>( VcfRecord::VariantType::kIndel ) == VCF_INDEL,
74  "Definitions of VCF_INDEL in htslib and of VariantType::kIndel in genesis differ. "
75  "Please submit a bug report at https://github.com/lczech/genesis/issues"
76 );
77 static_assert(
78  static_cast<int>( VcfRecord::VariantType::kOther ) == VCF_OTHER,
79  "Definitions of VCF_OTHER in htslib and of VariantType::kOther in genesis differ. "
80  "Please submit a bug report at https://github.com/lczech/genesis/issues"
81 );
82 static_assert(
83  static_cast<int>( VcfRecord::VariantType::kBreakend ) == VCF_BND,
84  "Definitions of VCF_BND in htslib and of VariantType::kBreakend in genesis differ. "
85  "Please submit a bug report at https://github.com/lczech/genesis/issues"
86 );
87 static_assert(
88  static_cast<int>( VcfRecord::VariantType::kOverlap ) == VCF_OVERLAP,
89  "Definitions of VCF_OVERLAP in htslib and of VariantType::kOverlap in genesis differ. "
90  "Please submit a bug report at https://github.com/lczech/genesis/issues"
91 );
92 
93 // =================================================================================================
94 // Constructors and Rule of Five
95 // =================================================================================================
96 
98 {
99  record_ = ::bcf_init();
100  if( ! record_ ) {
101  throw std::runtime_error( "Failed to default-initialize VcfRecord bcf1_t data structure." );
102  }
103 }
104 
106 {
107  header_ = &header;
108  record_ = ::bcf_init();
109  if( ! record_ ) {
110  throw std::runtime_error( "Failed to initialize VcfRecord bcf1_t data structure." );
111  }
112 }
113 
114 VcfRecord::VcfRecord( VcfHeader& header, ::bcf1_t* bcf1 )
115 {
116  header_ = &header;
117  record_ = ::bcf_dup( bcf1 );
118  if( ! record_ ) {
119  throw std::runtime_error( "Failed to copy-initialize VcfRecord bcf1_t data structure." );
120  }
121 }
122 
124 {
125  if( record_ ) {
126  ::bcf_destroy( record_ );
127  }
128  free( info_dest_string_ );
129  free( info_dest_float_ );
130  free( info_dest_int_ );
131 }
132 
134 {
135  // We swap everything, so that once `other` gets destroyed, our current data of `this` gets
136  // also destroyed with it.
137  this->swap( other );
138 }
139 
141 {
142  if( this == &other ) {
143  return *this;
144  }
145  this->swap( other );
146  return *this;
147 }
148 
150 {
151  std::swap( header_, other.header_ );
152  std::swap( record_, other.record_ );
153  std::swap( info_dest_string_, other.info_dest_string_ );
154  std::swap( info_dest_float_, other.info_dest_float_ );
155  std::swap( info_dest_int_, other.info_dest_int_ );
156  std::swap( info_ndest_string_, other.info_ndest_string_ );
157  std::swap( info_ndest_float_, other.info_ndest_float_ );
158  std::swap( info_ndest_int_, other.info_ndest_int_ );
159 }
160 
161 // =================================================================================================
162 // Simple Fixed Columns
163 // =================================================================================================
164 
165 void VcfRecord::unpack() const
166 {
167  ::bcf_unpack( record_, BCF_UN_STR );
168 }
169 
170 std::string VcfRecord::get_chromosome() const
171 {
172  std::string chr = ::bcf_hdr_id2name( header_->data(), record_->rid );
173  if( chr.empty() ) {
174  throw std::runtime_error(
175  "Malformed VCF file: empty chromosome name"
176  );
177  }
178  return chr;
179 }
180 
182 {
183  // This one time, htslib wants to be smart and make the position 0-based. While we appreciate
184  // their effort, in that case, this leads to inconcruencies for users who are not aware of this.
185  // Hence, we "fix" this back to the original number as given in the 1-based VCF/BCF file.
186  assert( record_->pos >= 0 );
187  return record_->pos + 1;
188 }
189 
190 std::string VcfRecord::get_id() const
191 {
192  ::bcf_unpack( record_, BCF_UN_STR );
193  return std::string( record_->d.id );
194 }
195 
196 std::string VcfRecord::at() const
197 {
198  auto const pos_id = std::string( get_id() != "." ? " (" + get_id() + ")" : "" );
199  return get_chromosome() + ":" + std::to_string( get_position() ) + pos_id;
200 }
201 
202 std::string VcfRecord::get_reference() const
203 {
204  // The REF allele is stored as allele[0], and its length is also stored in rlen, in addition to
205  // it being null terminated anyway. Let's all use this, and assert this.
206  ::bcf_unpack( record_, BCF_UN_STR );
207  assert( record_->n_allele > 0 );
208  assert( std::strlen(record_->d.allele[0]) == static_cast<size_t>( record_->rlen ));
209  return std::string( record_->d.allele[0] );
210 }
211 
212 std::vector<std::string> VcfRecord::get_alternatives() const
213 {
214  // The ALT alleles are stored in allele[1..n], so we need to re-index into our result vector.
215  ::bcf_unpack( record_, BCF_UN_STR );
216  assert( record_->n_allele > 0 );
217  auto ret = std::vector<std::string>( record_->n_allele - 1 );
218  for( size_t i = 1; i < record_->n_allele; ++i ) {
219  ret[ i - 1 ] = std::string( record_->d.allele[i] );
220  }
221  return ret;
222 }
223 
224 std::string VcfRecord::get_alternative( size_t index ) const
225 {
226  // The ALT alleles are stored in allele[1..n], so we need to re-index into our result vector.
227  ::bcf_unpack( record_, BCF_UN_STR );
228  assert( record_->n_allele > 0 );
229  if( index + 1 >= record_->n_allele ) {
230  throw std::invalid_argument(
231  "Cannot retrieve alternative at index " + std::to_string(index) + ", as the record " +
232  "line only has " + std::to_string( record_->n_allele - 1 ) + " alternative alleles."
233  );
234  }
235  assert( index + 1 < record_->n_allele );
236  return record_->d.allele[ index + 1 ];
237 }
238 
240 {
241  // Even if there are no alternatives (that is, set to "."), there has to be at least the REF
242  // allele, which we assert here, so that the re-indexing is ensured to work.
243  ::bcf_unpack( record_, BCF_UN_STR );
244  assert( record_->n_allele > 0 );
245  return record_->n_allele - 1;
246 }
247 
248 std::vector<std::string> VcfRecord::get_variants() const
249 {
250  // Bit of code duplication here, but this is just short enough to not require
251  // an extra function, at least for now.
252  ::bcf_unpack( record_, BCF_UN_STR );
253  auto ret = std::vector<std::string>( record_->n_allele );
254  for( size_t i = 0; i < record_->n_allele; ++i ) {
255  ret[i] = std::string( record_->d.allele[i] );
256  }
257  return ret;
258 }
259 
260 std::string VcfRecord::get_variant( size_t index ) const
261 {
262  ::bcf_unpack( record_, BCF_UN_STR );
263  assert( record_->n_allele > 0 );
264  if( index >= record_->n_allele ) {
265  throw std::invalid_argument(
266  "Cannot retrieve variant at index " + std::to_string(index) + ", as the record " +
267  "line only has " + std::to_string( record_->n_allele ) + " variants (reference + " +
268  "alternative alleles)."
269  );
270  }
271  assert( index < record_->n_allele );
272  return record_->d.allele[ index ];
273 }
274 
276 {
277  ::bcf_unpack( record_, BCF_UN_STR );
278  assert( record_->n_allele > 0 );
279  return record_->n_allele;
280 }
281 
283 {
284  return static_cast<VariantType>( ::bcf_get_variant_types( record_ ));
285 }
286 
288 {
289  // Nope, `bcf_dec_t.n_var` is NOT the number of variants that this record has. It is the
290  // allocated size, which might be bigger if the `bcf1_t` is re-used between records.
291  // So, we have to use `bcf1_t.n_allele` to get to the number of actual valid entries...
292  // Furthermore, `bcf_dec_t.var_type` is not necessarily one type, but the or'ed value
293  // of all types of the variant alleles. Of course. Well documented, htslib!
294  if( alt_index >= record_->n_allele ) {
295  throw std::runtime_error(
296  "Alternative allele index " + std::to_string( alt_index ) +
297  " out of bounds of the number of alleles " + std::to_string( record_->n_allele ) +
298  " of the record."
299  );
300  }
301  return static_cast<VariantType>( ::bcf_get_variant_type( record_, static_cast<int>( alt_index )));
302 }
303 
304 bool VcfRecord::is_snp() const
305 {
306  return ::bcf_is_snp( record_ );
307 }
308 
310 {
311  return record_->qual;
312 }
313 
314 // =================================================================================================
315 // Filter Column
316 // =================================================================================================
317 
318 std::vector<std::string> VcfRecord::get_filter_ids() const
319 {
320  ::bcf_unpack( record_, BCF_UN_FLT );
321  auto ret = std::vector<std::string>();
322  for( size_t i = 0; i < static_cast<size_t>( record_->d.n_flt ); ++i ) {
323  ret.push_back( std::string( bcf_hdr_int2id( header_->data(), BCF_DT_ID, record_->d.flt[i] )));
324  }
325  return ret;
326 }
327 
328 bool VcfRecord::has_filter( std::string const& filter ) const
329 {
330  // htslib expects a non-const pointer, as it potentially mutates the string...
331  char* cstr = new char[ filter.length() + 1] ;
332  std::strcpy( cstr, filter.c_str() );
333 
334  // Make the call.
335  int const res = ::bcf_has_filter( header_->data(), record_, cstr );
336 
337  // Clean up and check result. Free the string before the potential exception!
338  delete [] cstr;
339  if( res == -1 ) {
340  throw std::runtime_error( "Filter '" + filter + "' not defined in VCF/BCF header." );
341  }
342  return res;
343 }
344 
346 {
347  // We here take a shortcut to avoid the above string copy in has_filter().
348  // However, we still have to explicitly create the char array, as the htslib function
349  // expects a non-const char*, which does not work by simply passing "PASS" as a string literal.
350  char pass[] = "PASS";
351  return ::bcf_has_filter( header_->data(), record_, pass );
352 }
353 
354 // =================================================================================================
355 // Info Column
356 // =================================================================================================
357 
358 std::vector<std::string> VcfRecord::get_info_ids() const
359 {
360  ::bcf_unpack( record_, BCF_UN_INFO );
361  auto ret = std::vector<std::string>( record_->n_info );
362  for( size_t i = 0; i < static_cast<size_t>( record_->n_info ); ++i ) {
363  ret[i] = std::string( bcf_hdr_int2id( header_->data(), BCF_DT_ID, record_->d.info[i].key ));
364  }
365  return ret;
366 }
367 
368 bool VcfRecord::has_info( std::string const& id ) const
369 {
370  return has_info( id.c_str() );
371 }
372 
373 bool VcfRecord::has_info( char const* id ) const
374 {
375  return ::bcf_get_info( header_->data(), record_, id ) != nullptr;
376 
377  // The below code seems to return whether the field exists at all in the header... not what we want.
378  // int const id = bcf_hdr_id2int( header_->data(), BCF_DT_ID, id.c_str() );
379  // return bcf_hdr_idinfo_exists( header_->data(), BCF_HL_INFO, id );
380 }
381 
382 void VcfRecord::assert_info( std::string const& id ) const
383 {
384  assert_info( id.c_str() );
385 }
386 
387 void VcfRecord::assert_info( char const* id ) const
388 {
389  if( ! ::bcf_get_info( header_->data(), record_, id )) {
390  throw std::runtime_error(
391  "Required INFO tag " + std::string( id ) + " is not present in the record at " + at()
392  );
393  }
394 }
395 
396 std::string VcfRecord::get_info_string( std::string const& id ) const
397 {
398  std::string result;
399  get_info_string( id, result );
400  return result;
401 }
402 
403 void VcfRecord::get_info_string( std::string const& id, std::string& destination ) const
404 {
405  // Load the result into our buffer that we use to avoid reallocating memory all the time.
406  auto const len = get_info_ptr_(
407  id, BCF_HT_STR, reinterpret_cast<void**>( &info_dest_string_ ), &info_ndest_string_
408  );
409  assert( len >= 0 );
410  assert( info_ndest_string_ >= 0 );
411  assert( info_ndest_string_ >= len );
412 
413  // Copy into our destination string.
414  // destination.clear();
415  // destination.reserve( info_ndest_string_ );
416  // for( int i = 0; i < info_ndest_string_; ++i ) {
417  // if( destination.size() ) {
418  // destination += "***";
419  // }
420  // destination.append( std::string( static_cast<char*>( info_dest_string_ )));
421  // }
422 
423  destination.assign( static_cast<char*>( info_dest_string_ ), len );
424 }
425 
426 std::vector<double> VcfRecord::get_info_float( std::string const& id ) const
427 {
428  std::vector<double> result;
429  get_info_float( id, result );
430  return result;
431 }
432 
433 void VcfRecord::get_info_float( std::string const& id, std::vector<double>& destination ) const
434 {
435  // Load the result into our buffer that we use to avoid reallocating memory all the time.
436  auto const len = get_info_ptr_(
437  id, BCF_HT_REAL, reinterpret_cast<void**>( &info_dest_float_ ), &info_ndest_float_
438  );
439  assert( len >= 0 );
440  assert( info_ndest_float_ >= 0 );
441  assert( info_ndest_float_ >= len );
442 
443  // Copy over into vector.
444  destination.resize( len );
445  for( int i = 0; i < len; ++i ) {
446  destination[i] = static_cast<double>( static_cast<float*>(info_dest_float_)[i] );
447  }
448 }
449 
450 std::vector<int32_t> VcfRecord::get_info_int( std::string const& id ) const
451 {
452  std::vector<int32_t> result;
453  get_info_int( id, result );
454  return result;
455 }
456 
457 void VcfRecord::get_info_int( std::string const& id, std::vector<int32_t>& destination ) const
458 {
459  // Load the result into our buffer that we use to avoid reallocating memory all the time.
460  auto const len = get_info_ptr_(
461  id, BCF_HT_INT, reinterpret_cast<void**>( &info_dest_int_ ), &info_ndest_int_
462  );
463  assert( len >= 0 );
464  assert( info_ndest_int_ >= 0 );
465  assert( info_ndest_int_ >= len );
466 
467  // Copy over into vector.
468  destination.resize( len );
469  for( int i = 0; i < len; ++i ) {
470  destination[i] = static_cast<int32_t*>(info_dest_int_)[i];
471  }
472 }
473 
474 bool VcfRecord::get_info_flag( std::string const& id ) const
475 {
476  // For flags, the destination pointer is not used, and instead the value is immediately returned.
477  return get_info_ptr_( id, BCF_HT_FLAG, nullptr, nullptr );
478 }
479 
480 // =================================================================================================
481 // Format Column
482 // =================================================================================================
483 
484 std::vector<std::string> VcfRecord::get_format_ids() const
485 {
486  ::bcf_unpack( record_, BCF_UN_FMT );
487  auto ret = std::vector<std::string>( record_->n_fmt );
488  for( size_t i = 0; i < static_cast<size_t>( record_->n_fmt ); ++i ) {
489  ret[i] = std::string( bcf_hdr_int2id( header_->data(), BCF_DT_ID, record_->d.fmt[i].id ));
490  }
491  return ret;
492 }
493 
494 bool VcfRecord::has_format( std::string const& id ) const
495 {
496  return has_format( id.c_str() );
497 
498 }
499 
500 bool VcfRecord::has_format( char const* id ) const
501 {
502  return ::bcf_get_fmt( header_->data(), record_, id ) != nullptr;
503 
504 }
505 
506 void VcfRecord::assert_format( std::string const& id ) const
507 {
508  assert_format( id.c_str() );
509 }
510 
511 void VcfRecord::assert_format( char const* id ) const
512 {
513  if( ! ::bcf_get_fmt( header_->data(), record_, id )) {
514  throw std::runtime_error(
515  "Required FORMAT tag " + std::string( id ) + " is not present in the record at " + at()
516  );
517  }
518 }
519 
520 // =================================================================================================
521 // Sample Columns
522 // =================================================================================================
523 
525 {
526  return VcfFormatIteratorGenotype( header_->data(), record_, "GT", VcfValueType::kInteger );
527 }
528 
530 {
531  return VcfFormatIteratorGenotype();
532 }
533 
535  return {
536  VcfFormatIteratorGenotype( header_->data(), record_, "GT", VcfValueType::kInteger ),
538  };
539 }
540 
542 {
543  return VcfFormatIteratorString( header_->data(), record_, id, VcfValueType::kString );
544 }
545 
547 {
548  return VcfFormatIteratorString();
549 }
550 
552  std::string const& id
553 ) const {
554  return {
555  VcfFormatIteratorString( header_->data(), record_, id, VcfValueType::kString ),
557  };
558 }
559 
560 VcfFormatIteratorInt VcfRecord::begin_format_int( std::string const& id ) const
561 {
562  return VcfFormatIteratorInt( header_->data(), record_, id, VcfValueType::kInteger );
563 }
564 
566 {
567  return VcfFormatIteratorInt();
568 }
569 
571  std::string const& id
572 ) const {
573  return {
574  VcfFormatIteratorInt( header_->data(), record_, id, VcfValueType::kInteger ),
576  };
577 }
578 
580 {
581  return VcfFormatIteratorFloat( header_->data(), record_, id, VcfValueType::kFloat );
582 }
583 
585 {
586  return VcfFormatIteratorFloat();
587 }
588 
590  std::string const& id
591 ) const {
592  return {
593  VcfFormatIteratorFloat( header_->data(), record_, id, VcfValueType::kFloat ),
595  };
596 }
597 
598 // =================================================================================================
599 // Modifiers
600 // =================================================================================================
601 
603 {
604  bool const good = ( ::bcf_read1( source.data(), header_->data(), record_ ) == 0 );
605  // if( good ) {
606  // ::bcf_unpack( record_ , BCF_UN_ALL );
607  // }
608  return good;
609 }
610 
611 // =================================================================================================
612 // Internal Members
613 // =================================================================================================
614 
615 int VcfRecord::get_info_ptr_( std::string const& id, int ht_type, void** dest, int* ndest) const
616 {
617  // Call the htslib function, and call our function to check the return value, which encodes
618  // for errors as well (if negative). If there was an error, that function call throws
619  // an exception.
620  int const len = ::bcf_get_info_values( header_->data(), record_, id.c_str(), dest, ndest, ht_type );
621  VcfHeader::check_value_return_code_( header_->data(), id, ht_type, BCF_HL_INFO, len );
622 
623  // Assert that if ndest is used (for all but flags), it has a valid value.
624  assert( !ndest || ( *ndest >= 0 && *ndest >= len ));
625  return len;
626 }
627 
628 } // namespace population
629 } // namespace genesis
630 
631 #endif // htslib guard
genesis::placement::swap
void swap(Sample &lhs, Sample &rhs)
Definition: sample.cpp:104
genesis::population::VcfRecord::is_snp
bool is_snp() const
Return whether this variant is a SNP.
Definition: vcf_record.cpp:304
genesis::population::VcfRecord::at
std::string at() const
Return a textual representation of the current record chromosome position.
Definition: vcf_record.cpp:196
genesis::population::VcfRecord::begin_format_float
VcfFormatIteratorFloat begin_format_float(std::string const &id) const
Get the begin iterator over the samples that accesses a certain FORMAT id as a float value.
Definition: vcf_record.cpp:579
genesis::population::VcfRecord::get_variant_type
VariantType get_variant_type(size_t alt_index) const
Get the variant type of a particular alternative allele/sequence.
Definition: vcf_record.cpp:287
genesis::population::VcfRecord::header
VcfHeader & header()
Return the VcfHeader instance associated with this record.
Definition: vcf_record.hpp:213
genesis::population::VcfRecord::get_format_ids
std::vector< std::string > get_format_ids() const
Get the list of all format IDs (FORMAT column) that the record contains.
Definition: vcf_record.cpp:484
genesis::population::VcfRecord::assert_info
void assert_info(std::string const &id) const
Assert that an INFO entry with a given id is present in the record.
Definition: vcf_record.cpp:382
genesis::population::VcfRecord::operator=
VcfRecord & operator=(VcfRecord const &)=delete
genesis::population::VcfRecord::VariantType::kSnp
@ kSnp
genesis::population::VcfValueType::kString
@ kString
genesis::population::VcfRecord::begin_format_genotype
VcfFormatIteratorGenotype begin_format_genotype() const
Get the begin iterator over the samples that accesses the FORMAT genotype (GT field/key/id) as a set ...
Definition: vcf_record.cpp:524
genesis::population::VcfRecord::get_info_string
std::string get_info_string(std::string const &id) const
Return the info value for the given key id as a string.
Definition: vcf_record.cpp:396
genesis::population::VcfValueType::kFloat
@ kFloat
genesis::population::VcfRecord::assert_format
void assert_format(std::string const &id) const
Assert that an FORMAT entry with a given id is present in the record.
Definition: vcf_record.cpp:506
genesis::population::VcfRecord::get_info_flag
bool get_info_flag(std::string const &id) const
Return whehter an INFO flag is set, that is, whether the info value for a given key id is present in ...
Definition: vcf_record.cpp:474
genesis::population::VcfRecord::VariantType::kBreakend
@ kBreakend
genesis::population::VcfRecord::get_id
std::string get_id() const
Get the ID string of the variant (ID, third column of the line).
Definition: vcf_record.cpp:190
genesis::population::VcfRecord::begin_format_int
VcfFormatIteratorInt begin_format_int(std::string const &id) const
Get the begin iterator over the samples that accesses a certain FORMAT id as an int value.
Definition: vcf_record.cpp:560
genesis::population::VcfRecord::end_format_string
VcfFormatIteratorString end_format_string() const
Get the end iterator over the samples that accesses a certain FORMAT id as a string value.
Definition: vcf_record.cpp:546
genesis::population::VcfRecord::get_alternative
std::string get_alternative(size_t index) const
Get a particular alternative allele (ALT, fifth column of the line).
Definition: vcf_record.cpp:224
genesis::population::VcfRecord::get_alternatives
std::vector< std::string > get_alternatives() const
Get the alternative alleles/sequences of the variant (ALT, fifth column of the line).
Definition: vcf_record.cpp:212
genesis::population::VcfRecord::end_format_int
VcfFormatIteratorInt end_format_int() const
Get the end iterator over the samples that accesses a certain FORMAT id as an int value.
Definition: vcf_record.cpp:565
vcf_header.hpp
genesis::population::VcfFormatIteratorString
VcfFormatIterator< char *, std::string > VcfFormatIteratorString
Definition: vcf_format_iterator.hpp:65
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: functions/genome_locus.hpp:48
genesis::population::VcfRecord::pass_filter
bool pass_filter() const
Return whether the record passes the filters, that is, whether PASS is set.
Definition: vcf_record.cpp:345
genesis::population::VcfRecord::get_variants
std::vector< std::string > get_variants() const
Shortcut to get both the reference (REF, fourth column of the line) and the alternative (ALT,...
Definition: vcf_record.cpp:248
genesis::population::VcfFormatIteratorGenotype
VcfFormatIterator< int32_t, VcfGenotype > VcfFormatIteratorGenotype
Definition: vcf_format_iterator.hpp:68
genesis::population::VcfFormatIterator
Iterate the FORMAT information for the samples in a SNP/variant line in a VCF/BCF file.
Definition: vcf_format_iterator.hpp:62
genesis::population::VcfRecord::swap
void swap(VcfRecord &other)
Definition: vcf_record.cpp:149
genesis::population::VcfRecord::get_info_ids
std::vector< std::string > get_info_ids() const
Get the list of all info IDs (INFO column) that the record contains.
Definition: vcf_record.cpp:358
genesis::population::VcfRecord::read_next
bool read_next(HtsFile &source)
Read the next record/line from the given source, and replace the content of this VcfRecord instance.
Definition: vcf_record.cpp:602
genesis::population::VcfRecord::VariantType::kOther
@ kOther
genesis::population::HtsFile::data
::htsFile * data()
Definition: hts_file.hpp:97
genesis::population::VcfRecord::begin_format_string
VcfFormatIteratorString begin_format_string(std::string const &id) const
Get the begin iterator over the samples that accesses a certain FORMAT id as a string value.
Definition: vcf_record.cpp:541
genesis::population::VcfRecord::get_variant_types
VariantType get_variant_types() const
Get the or'ed (union) value of all variant types of the alternative alleles/sequences of the record.
Definition: vcf_record.cpp:282
genesis::population::VcfRecord::VcfRecord
VcfRecord()
Create a default (empty) instance.
Definition: vcf_record.cpp:97
hts_file.hpp
genesis::population::VcfValueType::kInteger
@ kInteger
genesis::population::VcfRecord::get_chromosome
std::string get_chromosome() const
Get the name of a chromosome/contig/sequence (CHROM, first column of the line).
Definition: vcf_record.cpp:170
genesis::utils::Range
Simple wrapper for typical begin() and end() iterators, to be used in range-based for loops.
Definition: range.hpp:46
genesis::population::VcfRecord::get_format_genotype
genesis::utils::Range< VcfFormatIteratorGenotype > get_format_genotype() const
Get an iterator pair over the samples that accesses the FORMAT genotype (GT field/key/id) as a set of...
Definition: vcf_record.cpp:534
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::VcfRecord::get_reference
std::string get_reference() const
Get the reference allele/sequence of the variant (REF, fourth column of the line).
Definition: vcf_record.cpp:202
genesis::population::VcfRecord::has_format
bool has_format(std::string const &id) const
Return whether the record has a given FORMAT id present.
Definition: vcf_record.cpp:494
genesis::population::VcfRecord::get_format_string
genesis::utils::Range< VcfFormatIteratorString > get_format_string(std::string const &id) const
Get an iterator pair over the samples that accesses a certain FORMAT id as a string value.
Definition: vcf_record.cpp:551
genesis::population::VcfRecord::VariantType::kMnp
@ kMnp
genesis::population::VcfRecord::has_info
bool has_info(std::string const &id) const
Return whether the record has a given INFO id present.
Definition: vcf_record.cpp:368
genesis::population::VcfRecord::get_format_float
genesis::utils::Range< VcfFormatIteratorFloat > get_format_float(std::string const &id) const
Get an iterator pair over the samples that accesses a certain FORMAT id as an float value.
Definition: vcf_record.cpp:589
genesis::population::VcfRecord::get_position
size_t get_position() const
Get the position within the chromosome/contig (POS, second column of the line).
Definition: vcf_record.cpp:181
genesis::population::VcfRecord::end_format_genotype
VcfFormatIteratorGenotype end_format_genotype() const
Get the end iterator over the samples that accesses the FORMAT genotype (GT field/key/id) as a set of...
Definition: vcf_record.cpp:529
genesis::population::VcfHeader::data
::bcf_hdr_t * data()
Return the internal htslib ::bcf_hdr_t data struct pointer.
Definition: vcf_header.hpp:177
genesis::population::VcfFormatIteratorInt
VcfFormatIterator< int32_t, int32_t > VcfFormatIteratorInt
Definition: vcf_format_iterator.hpp:66
genesis::population::VcfFormatIteratorFloat
VcfFormatIterator< float, double > VcfFormatIteratorFloat
Definition: vcf_format_iterator.hpp:67
genesis::population::VcfRecord::get_variant_count
size_t get_variant_count() const
Get the total number of variants (REF and ALT alleles) in the record/line.
Definition: vcf_record.cpp:275
genesis::population::VcfRecord::get_filter_ids
std::vector< std::string > get_filter_ids() const
Get the list of all filter values (PASS or the names of the non-passing filters) that are applied to ...
Definition: vcf_record.cpp:318
genesis::population::VcfRecord
Capture the information of a single SNP/variant line in a VCF/BCF file.
Definition: vcf_record.hpp:107
genesis::population::VcfRecord::get_info_int
std::vector< int32_t > get_info_int(std::string const &id) const
Return the info value for the given key id as a vector of int.
Definition: vcf_record.cpp:450
genesis::population::VcfRecord::get_alternatives_count
size_t get_alternatives_count() const
Get the number of alternative alleles/sequences of the variant (ALT, fifth column of the line).
Definition: vcf_record.cpp:239
genesis::population::VcfRecord::end_format_float
VcfFormatIteratorFloat end_format_float() const
Get the end iterator over the samples that accesses a certain FORMAT id as a float value.
Definition: vcf_record.cpp:584
genesis::population::VcfRecord::VariantType
VariantType
Types of variants of alleles that can occur in a record.
Definition: vcf_record.hpp:121
genesis::population::VcfRecord::unpack
void unpack() const
Unpack the htslib bcf1_t record data.
Definition: vcf_record.cpp:165
vcf_record.hpp
genesis::population::VcfRecord::get_format_int
genesis::utils::Range< VcfFormatIteratorInt > get_format_int(std::string const &id) const
Get an iterator pair over the samples that accesses a certain FORMAT id as an int value.
Definition: vcf_record.cpp:570
genesis::population::VcfRecord::get_info_float
std::vector< double > get_info_float(std::string const &id) const
Return the info value for the given key id as a vector of float/double.
Definition: vcf_record.cpp:426
genesis::population::VcfRecord::VariantType::kIndel
@ kIndel
genesis::population::VcfRecord::get_variant
std::string get_variant(size_t index) const
Get a particular variant (REF or ALT allele).
Definition: vcf_record.cpp:260
genesis::population::VcfHeader
Capture the information from a header of a VCF/BCF file.
Definition: vcf_header.hpp:102
genesis::population::HtsFile
Wrap an ::htsFile struct.
Definition: hts_file.hpp:56
genesis::population::VcfRecord::VariantType::kRef
@ kRef
genesis::population::VcfRecord::VariantType::kOverlap
@ kOverlap
genesis::population::VcfRecord::get_quality
double get_quality() const
Get the quality score (QUAL, sixth column of the line).
Definition: vcf_record.cpp:309
genesis::population::VcfRecord::~VcfRecord
~VcfRecord()
Definition: vcf_record.cpp:123
genesis::population::VcfRecord::has_filter
bool has_filter(std::string const &filter) const
Return whether the record has a given filter set.
Definition: vcf_record.cpp:328