A library for working with phylogenetic and population genetic data.
v0.32.0
vcf_record.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2024 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lczech@carnegiescience.edu>
20  Department of Plant Biology, Carnegie Institution For Science
21  260 Panama Street, Stanford, CA 94305, USA
22 */
23 
31 #ifdef GENESIS_HTSLIB
32 
34 
37 
38 extern "C" {
39  #include <htslib/hts.h>
40  #include <htslib/vcf.h>
41 }
42 
43 #include <cassert>
44 #include <cstdint>
45 #include <cstdlib>
46 #include <cstring>
47 #include <stdexcept>
48 
49 namespace genesis {
50 namespace population {
51 
52 // =================================================================================================
53 // Typedefs and Enums
54 // =================================================================================================
55 
56 // VariantType
57 static_assert(
58  static_cast<int>( VcfRecord::VariantType::kRef ) == VCF_REF,
59  "Definitions of VCF_REF in htslib and of VariantType::kRef in genesis differ. "
60  "Please submit a bug report at https://github.com/lczech/genesis/issues"
61 );
62 static_assert(
63  static_cast<int>( VcfRecord::VariantType::kSnp ) == VCF_SNP,
64  "Definitions of VCF_SNP in htslib and of VariantType::kSnp in genesis differ. "
65  "Please submit a bug report at https://github.com/lczech/genesis/issues"
66 );
67 static_assert(
68  static_cast<int>( VcfRecord::VariantType::kMnp ) == VCF_MNP,
69  "Definitions of VCF_MNP in htslib and of VariantType::kMnp in genesis differ. "
70  "Please submit a bug report at https://github.com/lczech/genesis/issues"
71 );
72 static_assert(
73  static_cast<int>( VcfRecord::VariantType::kIndel ) == VCF_INDEL,
74  "Definitions of VCF_INDEL in htslib and of VariantType::kIndel in genesis differ. "
75  "Please submit a bug report at https://github.com/lczech/genesis/issues"
76 );
77 static_assert(
78  static_cast<int>( VcfRecord::VariantType::kOther ) == VCF_OTHER,
79  "Definitions of VCF_OTHER in htslib and of VariantType::kOther in genesis differ. "
80  "Please submit a bug report at https://github.com/lczech/genesis/issues"
81 );
82 static_assert(
83  static_cast<int>( VcfRecord::VariantType::kBreakend ) == VCF_BND,
84  "Definitions of VCF_BND in htslib and of VariantType::kBreakend in genesis differ. "
85  "Please submit a bug report at https://github.com/lczech/genesis/issues"
86 );
87 static_assert(
88  static_cast<int>( VcfRecord::VariantType::kOverlap ) == VCF_OVERLAP,
89  "Definitions of VCF_OVERLAP in htslib and of VariantType::kOverlap in genesis differ. "
90  "Please submit a bug report at https://github.com/lczech/genesis/issues"
91 );
92 
93 // =================================================================================================
94 // Constructors and Rule of Five
95 // =================================================================================================
96 
98 {
99  record_ = ::bcf_init();
100  if( ! record_ ) {
101  throw std::runtime_error( "Failed to default-initialize VcfRecord bcf1_t data structure." );
102  }
103 }
104 
106 {
107  header_ = &header;
108  record_ = ::bcf_init();
109  if( ! record_ ) {
110  throw std::runtime_error( "Failed to initialize VcfRecord bcf1_t data structure." );
111  }
112 }
113 
114 VcfRecord::VcfRecord( VcfHeader& header, ::bcf1_t* bcf1 )
115 {
116  header_ = &header;
117  record_ = ::bcf_dup( bcf1 );
118  if( ! record_ ) {
119  throw std::runtime_error( "Failed to copy-initialize VcfRecord bcf1_t data structure." );
120  }
121 }
122 
124 {
125  if( record_ ) {
126  ::bcf_destroy( record_ );
127  }
128  free( info_dest_string_ );
129  free( info_dest_float_ );
130  free( info_dest_int_ );
131 }
132 
134 {
135  // We swap everything, so that once `other` gets destroyed, our current data of `this` gets
136  // also destroyed with it.
137  this->swap( other );
138 }
139 
141 {
142  if( this == &other ) {
143  return *this;
144  }
145  this->swap( other );
146  return *this;
147 }
148 
150 {
151  std::swap( header_, other.header_ );
152  std::swap( record_, other.record_ );
153  std::swap( info_dest_string_, other.info_dest_string_ );
154  std::swap( info_dest_float_, other.info_dest_float_ );
155  std::swap( info_dest_int_, other.info_dest_int_ );
156  std::swap( info_ndest_string_, other.info_ndest_string_ );
157  std::swap( info_ndest_float_, other.info_ndest_float_ );
158  std::swap( info_ndest_int_, other.info_ndest_int_ );
159 }
160 
161 // =================================================================================================
162 // Simple Fixed Columns
163 // =================================================================================================
164 
165 void VcfRecord::unpack() const
166 {
167  ::bcf_unpack( record_, BCF_UN_STR );
168 }
169 
170 std::string VcfRecord::get_chromosome() const
171 {
172  std::string chr = ::bcf_hdr_id2name( header_->data(), record_->rid );
173  if( chr.empty() ) {
174  throw std::runtime_error(
175  "Malformed VCF file: empty chromosome name"
176  );
177  }
178  return chr;
179 }
180 
182 {
183  // This one time, htslib wants to be smart and make the position 0-based. While we appreciate
184  // their effort, in that case, this leads to inconcruencies for users who are not aware of this.
185  // Hence, we "fix" this back to the original number as given in the 1-based VCF/BCF file.
186  assert( record_->pos >= 0 );
187  return record_->pos + 1;
188 }
189 
190 std::string VcfRecord::get_id() const
191 {
192  ::bcf_unpack( record_, BCF_UN_STR );
193  return std::string( record_->d.id );
194 }
195 
196 std::string VcfRecord::at() const
197 {
198  auto const pos_id = std::string( get_id() != "." ? " (" + get_id() + ")" : "" );
199  return get_chromosome() + ":" + std::to_string( get_position() ) + pos_id;
200 }
201 
202 std::string VcfRecord::get_reference() const
203 {
204  // The REF allele is stored as allele[0], and its length is also stored in rlen, in addition to
205  // it being null terminated anyway. Let's all use this, and assert this.
206  ::bcf_unpack( record_, BCF_UN_STR );
207  assert( record_->n_allele > 0 );
208  assert( std::strlen(record_->d.allele[0]) == static_cast<size_t>( record_->rlen ));
209  return std::string( record_->d.allele[0] );
210 }
211 
212 std::vector<std::string> VcfRecord::get_alternatives() const
213 {
214  // The ALT alleles are stored in allele[1..n], so we need to re-index into our result vector.
215  ::bcf_unpack( record_, BCF_UN_STR );
216  assert( record_->n_allele > 0 );
217  auto ret = std::vector<std::string>( record_->n_allele - 1 );
218  for( size_t i = 1; i < record_->n_allele; ++i ) {
219  ret[ i - 1 ] = std::string( record_->d.allele[i] );
220  }
221  return ret;
222 }
223 
224 std::string VcfRecord::get_alternative( size_t index ) const
225 {
226  // The ALT alleles are stored in allele[1..n], so we need to re-index into our result vector.
227  ::bcf_unpack( record_, BCF_UN_STR );
228  assert( record_->n_allele > 0 );
229  if( index + 1 >= record_->n_allele ) {
230  throw std::invalid_argument(
231  "Cannot retrieve alternative at index " + std::to_string(index) + ", as the record " +
232  "line only has " + std::to_string( record_->n_allele - 1 ) + " alternative alleles."
233  );
234  }
235  assert( index + 1 < record_->n_allele );
236  return record_->d.allele[ index + 1 ];
237 }
238 
240 {
241  // Even if there are no alternatives (that is, set to "."), there has to be at least the REF
242  // allele, which we assert here, so that the re-indexing is ensured to work.
243  ::bcf_unpack( record_, BCF_UN_STR );
244  assert( record_->n_allele > 0 );
245  return record_->n_allele - 1;
246 }
247 
248 std::vector<std::string> VcfRecord::get_variants() const
249 {
250  // Bit of code duplication here, but this is just short enough to not require
251  // an extra function, at least for now.
252  ::bcf_unpack( record_, BCF_UN_STR );
253  auto ret = std::vector<std::string>( record_->n_allele );
254  for( size_t i = 0; i < record_->n_allele; ++i ) {
255  ret[i] = std::string( record_->d.allele[i] );
256  }
257  return ret;
258 }
259 
260 std::string VcfRecord::get_variant( size_t index ) const
261 {
262  ::bcf_unpack( record_, BCF_UN_STR );
263  assert( record_->n_allele > 0 );
264  if( index >= record_->n_allele ) {
265  throw std::invalid_argument(
266  "Cannot retrieve variant at index " + std::to_string(index) + ", as the record " +
267  "line only has " + std::to_string( record_->n_allele ) + " variants (reference + " +
268  "alternative alleles)."
269  );
270  }
271  assert( index < record_->n_allele );
272  return record_->d.allele[ index ];
273 }
274 
276 {
277  ::bcf_unpack( record_, BCF_UN_STR );
278  assert( record_->n_allele > 0 );
279  return record_->n_allele;
280 }
281 
283 {
284  return static_cast<VariantType>( ::bcf_get_variant_types( record_ ));
285 }
286 
288 {
289  // Nope, `bcf_dec_t.n_var` is NOT the number of variants that this record has. It is the
290  // allocated size, which might be bigger if the `bcf1_t` is re-used between records.
291  // So, we have to use `bcf1_t.n_allele` to get to the number of actual valid entries...
292  // Furthermore, `bcf_dec_t.var_type` is not necessarily one type, but the or'ed value
293  // of all types of the variant alleles. Of course. Well documented, htslib!
294  if( alt_index >= record_->n_allele ) {
295  throw std::runtime_error(
296  "Alternative allele index " + std::to_string( alt_index ) +
297  " out of bounds of the number of alleles " + std::to_string( record_->n_allele ) +
298  " of the record."
299  );
300  }
301  return static_cast<VariantType>( ::bcf_get_variant_type( record_, static_cast<int>( alt_index )));
302 }
303 
304 bool VcfRecord::is_snp() const
305 {
306  return ::bcf_is_snp( record_ );
307 }
308 
310 {
311  // We here need a special case of the ::bcf_is_snp() function, so we base our code on theirs,
312  // but rewrite it to fit our needs. They do a weird loop, which we have simplified here a bit.
313  // We do not need to work with the mpileup <X> format that they check for - if we have that,
314  // then we are definitely returning false here anyway.
315 
316  bcf_unpack( record_, BCF_UN_STR );
317  for( size_t i = 0; i < record_->n_allele; ++i) {
318  // We return false for any non-single-char allele, for monomorphic reference ('.'),
319  // and if the ref allele is a deletion. The monomorphic reference should actually
320  // not occur in practicel, as thye n_allele count would just not have an entry for it
321  // anyway. Still, seems to be better to check, just in case.
322  if( record_->d.allele[i][1] != 0 ) {
323  return false;
324  }
325  if( record_->d.allele[i][0] == '.' ) {
326  return false;
327  }
328  if( record_->d.allele[i][0] == '*' && i == 0 ) {
329  return false;
330  }
331  }
332  return true;
333 }
334 
336 {
337  return record_->qual;
338 }
339 
340 // =================================================================================================
341 // Filter Column
342 // =================================================================================================
343 
344 std::vector<std::string> VcfRecord::get_filter_ids() const
345 {
346  ::bcf_unpack( record_, BCF_UN_FLT );
347  auto ret = std::vector<std::string>();
348  for( size_t i = 0; i < static_cast<size_t>( record_->d.n_flt ); ++i ) {
349  ret.push_back( std::string( bcf_hdr_int2id( header_->data(), BCF_DT_ID, record_->d.flt[i] )));
350  }
351  return ret;
352 }
353 
354 bool VcfRecord::has_filter( std::string const& filter ) const
355 {
356  // htslib expects a non-const pointer, as it potentially mutates the string...
357  char* cstr = new char[ filter.length() + 1] ;
358  std::strcpy( cstr, filter.c_str() );
359 
360  // Make the call.
361  int const res = ::bcf_has_filter( header_->data(), record_, cstr );
362 
363  // Clean up and check result. Free the string before the potential exception!
364  delete [] cstr;
365  if( res == -1 ) {
366  throw std::runtime_error( "Filter '" + filter + "' not defined in VCF/BCF header." );
367  }
368  return res;
369 }
370 
372 {
373  // We here take a shortcut to avoid the above string copy in has_filter().
374  // However, we still have to explicitly create the char array, as the htslib function
375  // expects a non-const char*, which does not work by simply passing "PASS" as a string literal.
376  // This also covers '.' (no filter applied), as according to the htslib doc and code,
377  // "PASS" and "." can be used interchangeably.
378  char pass[] = "PASS";
379  return ::bcf_has_filter( header_->data(), record_, pass );
380 }
381 
382 // =================================================================================================
383 // Info Column
384 // =================================================================================================
385 
386 std::vector<std::string> VcfRecord::get_info_ids() const
387 {
388  ::bcf_unpack( record_, BCF_UN_INFO );
389  auto ret = std::vector<std::string>( record_->n_info );
390  for( size_t i = 0; i < static_cast<size_t>( record_->n_info ); ++i ) {
391  ret[i] = std::string( bcf_hdr_int2id( header_->data(), BCF_DT_ID, record_->d.info[i].key ));
392  }
393  return ret;
394 }
395 
396 bool VcfRecord::has_info( std::string const& id ) const
397 {
398  return has_info( id.c_str() );
399 }
400 
401 bool VcfRecord::has_info( char const* id ) const
402 {
403  return ::bcf_get_info( header_->data(), record_, id ) != nullptr;
404 
405  // The below code seems to return whether the field exists at all in the header... not what we want.
406  // int const id = bcf_hdr_id2int( header_->data(), BCF_DT_ID, id.c_str() );
407  // return bcf_hdr_idinfo_exists( header_->data(), BCF_HL_INFO, id );
408 }
409 
410 void VcfRecord::assert_info( std::string const& id ) const
411 {
412  assert_info( id.c_str() );
413 }
414 
415 void VcfRecord::assert_info( char const* id ) const
416 {
417  if( ! ::bcf_get_info( header_->data(), record_, id )) {
418  throw std::runtime_error(
419  "Required INFO tag " + std::string( id ) + " is not present in the record at " + at()
420  );
421  }
422 }
423 
424 std::string VcfRecord::get_info_string( std::string const& id ) const
425 {
426  std::string result;
427  get_info_string( id, result );
428  return result;
429 }
430 
431 void VcfRecord::get_info_string( std::string const& id, std::string& destination ) const
432 {
433  // Load the result into our buffer that we use to avoid reallocating memory all the time.
434  auto const len = get_info_ptr_(
435  id, BCF_HT_STR, reinterpret_cast<void**>( &info_dest_string_ ), &info_ndest_string_
436  );
437  assert( len >= 0 );
438  assert( info_ndest_string_ >= 0 );
439  assert( info_ndest_string_ >= len );
440 
441  // Copy into our destination string.
442  // destination.clear();
443  // destination.reserve( info_ndest_string_ );
444  // for( int i = 0; i < info_ndest_string_; ++i ) {
445  // if( destination.size() ) {
446  // destination += "***";
447  // }
448  // destination.append( std::string( static_cast<char*>( info_dest_string_ )));
449  // }
450 
451  destination.assign( static_cast<char*>( info_dest_string_ ), len );
452 }
453 
454 std::vector<double> VcfRecord::get_info_float( std::string const& id ) const
455 {
456  std::vector<double> result;
457  get_info_float( id, result );
458  return result;
459 }
460 
461 void VcfRecord::get_info_float( std::string const& id, std::vector<double>& destination ) const
462 {
463  // Load the result into our buffer that we use to avoid reallocating memory all the time.
464  auto const len = get_info_ptr_(
465  id, BCF_HT_REAL, reinterpret_cast<void**>( &info_dest_float_ ), &info_ndest_float_
466  );
467  assert( len >= 0 );
468  assert( info_ndest_float_ >= 0 );
469  assert( info_ndest_float_ >= len );
470 
471  // Copy over into vector.
472  destination.resize( len );
473  for( int i = 0; i < len; ++i ) {
474  destination[i] = static_cast<double>( static_cast<float*>(info_dest_float_)[i] );
475  }
476 }
477 
478 std::vector<int32_t> VcfRecord::get_info_int( std::string const& id ) const
479 {
480  std::vector<int32_t> result;
481  get_info_int( id, result );
482  return result;
483 }
484 
485 void VcfRecord::get_info_int( std::string const& id, std::vector<int32_t>& destination ) const
486 {
487  // Load the result into our buffer that we use to avoid reallocating memory all the time.
488  auto const len = get_info_ptr_(
489  id, BCF_HT_INT, reinterpret_cast<void**>( &info_dest_int_ ), &info_ndest_int_
490  );
491  assert( len >= 0 );
492  assert( info_ndest_int_ >= 0 );
493  assert( info_ndest_int_ >= len );
494 
495  // Copy over into vector.
496  destination.resize( len );
497  for( int i = 0; i < len; ++i ) {
498  destination[i] = static_cast<int32_t*>(info_dest_int_)[i];
499  }
500 }
501 
502 bool VcfRecord::get_info_flag( std::string const& id ) const
503 {
504  // For flags, the destination pointer is not used, and instead the value is immediately returned.
505  return get_info_ptr_( id, BCF_HT_FLAG, nullptr, nullptr );
506 }
507 
508 // =================================================================================================
509 // Format Column
510 // =================================================================================================
511 
512 std::vector<std::string> VcfRecord::get_format_ids() const
513 {
514  ::bcf_unpack( record_, BCF_UN_FMT );
515  auto ret = std::vector<std::string>( record_->n_fmt );
516  for( size_t i = 0; i < static_cast<size_t>( record_->n_fmt ); ++i ) {
517  ret[i] = std::string( bcf_hdr_int2id( header_->data(), BCF_DT_ID, record_->d.fmt[i].id ));
518  }
519  return ret;
520 }
521 
522 bool VcfRecord::has_format( std::string const& id ) const
523 {
524  return has_format( id.c_str() );
525 
526 }
527 
528 bool VcfRecord::has_format( char const* id ) const
529 {
530  return ::bcf_get_fmt( header_->data(), record_, id ) != nullptr;
531 
532 }
533 
534 void VcfRecord::assert_format( std::string const& id ) const
535 {
536  assert_format( id.c_str() );
537 }
538 
539 void VcfRecord::assert_format( char const* id ) const
540 {
541  if( ! ::bcf_get_fmt( header_->data(), record_, id )) {
542  throw std::runtime_error(
543  "Required FORMAT tag " + std::string( id ) + " is not present in the record at " + at()
544  );
545  }
546 }
547 
548 // =================================================================================================
549 // Sample Columns
550 // =================================================================================================
551 
553 {
554  return VcfFormatIteratorGenotype( header_->data(), record_, "GT", VcfValueType::kInteger );
555 }
556 
558 {
559  return VcfFormatIteratorGenotype();
560 }
561 
563  return {
564  VcfFormatIteratorGenotype( header_->data(), record_, "GT", VcfValueType::kInteger ),
566  };
567 }
568 
570 {
571  return VcfFormatIteratorString( header_->data(), record_, id, VcfValueType::kString );
572 }
573 
575 {
576  return VcfFormatIteratorString();
577 }
578 
580  std::string const& id
581 ) const {
582  return {
583  VcfFormatIteratorString( header_->data(), record_, id, VcfValueType::kString ),
585  };
586 }
587 
588 VcfFormatIteratorInt VcfRecord::begin_format_int( std::string const& id ) const
589 {
590  return VcfFormatIteratorInt( header_->data(), record_, id, VcfValueType::kInteger );
591 }
592 
594 {
595  return VcfFormatIteratorInt();
596 }
597 
599  std::string const& id
600 ) const {
601  return {
602  VcfFormatIteratorInt( header_->data(), record_, id, VcfValueType::kInteger ),
604  };
605 }
606 
608 {
609  return VcfFormatIteratorFloat( header_->data(), record_, id, VcfValueType::kFloat );
610 }
611 
613 {
614  return VcfFormatIteratorFloat();
615 }
616 
618  std::string const& id
619 ) const {
620  return {
621  VcfFormatIteratorFloat( header_->data(), record_, id, VcfValueType::kFloat ),
623  };
624 }
625 
626 // =================================================================================================
627 // Modifiers
628 // =================================================================================================
629 
631 {
632  bool const good = ( ::bcf_read1( source.data(), header_->data(), record_ ) == 0 );
633  // if( good ) {
634  // ::bcf_unpack( record_ , BCF_UN_ALL );
635  // }
636  return good;
637 }
638 
639 // =================================================================================================
640 // Internal Members
641 // =================================================================================================
642 
643 int VcfRecord::get_info_ptr_( std::string const& id, int ht_type, void** dest, int* ndest) const
644 {
645  // Call the htslib function, and call our function to check the return value, which encodes
646  // for errors as well (if negative). If there was an error, that function call throws
647  // an exception.
648  int const len = ::bcf_get_info_values( header_->data(), record_, id.c_str(), dest, ndest, ht_type );
649  VcfHeader::check_value_return_code_( header_->data(), id, ht_type, BCF_HL_INFO, len );
650 
651  // Assert that if ndest is used (for all but flags), it has a valid value.
652  assert( !ndest || ( *ndest >= 0 && *ndest >= len ));
653  return len;
654 }
655 
656 } // namespace population
657 } // namespace genesis
658 
659 #endif // htslib guard
genesis::placement::swap
void swap(Sample &lhs, Sample &rhs)
Definition: sample.cpp:104
genesis::population::VcfRecord::is_snp
bool is_snp() const
Return whether this variant is a SNP.
Definition: vcf_record.cpp:304
genesis::population::VcfRecord::at
std::string at() const
Return a textual representation of the current record chromosome position.
Definition: vcf_record.cpp:196
genesis::population::VcfRecord::begin_format_float
VcfFormatIteratorFloat begin_format_float(std::string const &id) const
Get the begin iterator over the samples that accesses a certain FORMAT id as a float value.
Definition: vcf_record.cpp:607
genesis::population::VcfRecord::get_variant_type
VariantType get_variant_type(size_t alt_index) const
Get the variant type of a particular alternative allele/sequence.
Definition: vcf_record.cpp:287
genesis::population::VcfRecord::header
VcfHeader & header()
Return the VcfHeader instance associated with this record.
Definition: vcf_record.hpp:213
genesis::population::VcfRecord::get_format_ids
std::vector< std::string > get_format_ids() const
Get the list of all format IDs (FORMAT column) that the record contains.
Definition: vcf_record.cpp:512
genesis::population::VcfRecord::assert_info
void assert_info(std::string const &id) const
Assert that an INFO entry with a given id is present in the record.
Definition: vcf_record.cpp:410
genesis::population::VcfRecord::operator=
VcfRecord & operator=(VcfRecord const &)=delete
genesis::population::VcfRecord::VariantType::kSnp
@ kSnp
genesis::population::VcfValueType::kString
@ kString
genesis::population::VcfRecord::begin_format_genotype
VcfFormatIteratorGenotype begin_format_genotype() const
Get the begin iterator over the samples that accesses the FORMAT genotype (GT field/key/id) as a set ...
Definition: vcf_record.cpp:552
genesis::population::VcfRecord::get_info_string
std::string get_info_string(std::string const &id) const
Return the info value for the given key id as a string.
Definition: vcf_record.cpp:424
genesis::population::VcfValueType::kFloat
@ kFloat
genesis::population::VcfRecord::assert_format
void assert_format(std::string const &id) const
Assert that an FORMAT entry with a given id is present in the record.
Definition: vcf_record.cpp:534
genesis::population::VcfRecord::get_info_flag
bool get_info_flag(std::string const &id) const
Return whehter an INFO flag is set, that is, whether the info value for a given key id is present in ...
Definition: vcf_record.cpp:502
genesis::population::VcfRecord::VariantType::kBreakend
@ kBreakend
genesis::population::VcfRecord::get_id
std::string get_id() const
Get the ID string of the variant (ID, third column of the line).
Definition: vcf_record.cpp:190
genesis::population::VcfRecord::begin_format_int
VcfFormatIteratorInt begin_format_int(std::string const &id) const
Get the begin iterator over the samples that accesses a certain FORMAT id as an int value.
Definition: vcf_record.cpp:588
genesis::population::VcfRecord::end_format_string
VcfFormatIteratorString end_format_string() const
Get the end iterator over the samples that accesses a certain FORMAT id as a string value.
Definition: vcf_record.cpp:574
genesis::population::VcfRecord::get_alternative
std::string get_alternative(size_t index) const
Get a particular alternative allele (ALT, fifth column of the line).
Definition: vcf_record.cpp:224
genesis::population::VcfRecord::get_alternatives
std::vector< std::string > get_alternatives() const
Get the alternative alleles/sequences of the variant (ALT, fifth column of the line).
Definition: vcf_record.cpp:212
genesis::population::VcfRecord::end_format_int
VcfFormatIteratorInt end_format_int() const
Get the end iterator over the samples that accesses a certain FORMAT id as an int value.
Definition: vcf_record.cpp:593
vcf_header.hpp
genesis::population::VcfFormatIteratorString
VcfFormatIterator< char *, std::string > VcfFormatIteratorString
Definition: vcf_format_iterator.hpp:65
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: function/genome_locus.hpp:52
genesis::population::VcfRecord::pass_filter
bool pass_filter() const
Return whether the record passes the filters, that is, whether PASS is set, or no filters were applie...
Definition: vcf_record.cpp:371
genesis::population::VcfRecord::get_variants
std::vector< std::string > get_variants() const
Shortcut to get both the reference (REF, fourth column of the line) and the alternative (ALT,...
Definition: vcf_record.cpp:248
genesis::population::VcfFormatIteratorGenotype
VcfFormatIterator< int32_t, VcfGenotype > VcfFormatIteratorGenotype
Definition: vcf_format_iterator.hpp:68
genesis::population::VcfFormatIterator
Iterate the FORMAT information for the samples in a SNP/variant line in a VCF/BCF file.
Definition: vcf_format_iterator.hpp:62
genesis::population::VcfRecord::swap
void swap(VcfRecord &other)
Definition: vcf_record.cpp:149
genesis::population::VcfRecord::get_info_ids
std::vector< std::string > get_info_ids() const
Get the list of all info IDs (INFO column) that the record contains.
Definition: vcf_record.cpp:386
genesis::population::VcfRecord::read_next
bool read_next(HtsFile &source)
Read the next record/line from the given source, and replace the content of this VcfRecord instance.
Definition: vcf_record.cpp:630
genesis::population::VcfRecord::VariantType::kOther
@ kOther
genesis::population::HtsFile::data
::htsFile * data()
Definition: hts_file.hpp:97
genesis::population::VcfRecord::begin_format_string
VcfFormatIteratorString begin_format_string(std::string const &id) const
Get the begin iterator over the samples that accesses a certain FORMAT id as a string value.
Definition: vcf_record.cpp:569
genesis::population::VcfRecord::get_variant_types
VariantType get_variant_types() const
Get the or'ed (union) value of all variant types of the alternative alleles/sequences of the record.
Definition: vcf_record.cpp:282
genesis::population::VcfRecord::VcfRecord
VcfRecord()
Create a default (empty) instance.
Definition: vcf_record.cpp:97
hts_file.hpp
genesis::population::VcfValueType::kInteger
@ kInteger
genesis::population::VcfRecord::get_chromosome
std::string get_chromosome() const
Get the name of a chromosome/contig/sequence (CHROM, first column of the line).
Definition: vcf_record.cpp:170
genesis::utils::Range
Simple wrapper for typical begin() and end() iterators, to be used in range-based for loops.
Definition: range.hpp:46
genesis::population::VcfRecord::get_format_genotype
genesis::utils::Range< VcfFormatIteratorGenotype > get_format_genotype() const
Get an iterator pair over the samples that accesses the FORMAT genotype (GT field/key/id) as a set of...
Definition: vcf_record.cpp:562
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::VcfRecord::get_reference
std::string get_reference() const
Get the reference allele/sequence of the variant (REF, fourth column of the line).
Definition: vcf_record.cpp:202
genesis::population::VcfRecord::has_format
bool has_format(std::string const &id) const
Return whether the record has a given FORMAT id present.
Definition: vcf_record.cpp:522
genesis::population::VcfRecord::get_format_string
genesis::utils::Range< VcfFormatIteratorString > get_format_string(std::string const &id) const
Get an iterator pair over the samples that accesses a certain FORMAT id as a string value.
Definition: vcf_record.cpp:579
genesis::population::VcfRecord::VariantType::kMnp
@ kMnp
genesis::population::VcfRecord::has_info
bool has_info(std::string const &id) const
Return whether the record has a given INFO id present.
Definition: vcf_record.cpp:396
genesis::population::VcfRecord::get_format_float
genesis::utils::Range< VcfFormatIteratorFloat > get_format_float(std::string const &id) const
Get an iterator pair over the samples that accesses a certain FORMAT id as an float value.
Definition: vcf_record.cpp:617
genesis::population::VcfRecord::get_position
size_t get_position() const
Get the position within the chromosome/contig (POS, second column of the line).
Definition: vcf_record.cpp:181
genesis::population::VcfRecord::end_format_genotype
VcfFormatIteratorGenotype end_format_genotype() const
Get the end iterator over the samples that accesses the FORMAT genotype (GT field/key/id) as a set of...
Definition: vcf_record.cpp:557
genesis::population::VcfHeader::data
::bcf_hdr_t * data()
Return the internal htslib ::bcf_hdr_t data struct pointer.
Definition: vcf_header.hpp:177
genesis::population::VcfFormatIteratorInt
VcfFormatIterator< int32_t, int32_t > VcfFormatIteratorInt
Definition: vcf_format_iterator.hpp:66
genesis::population::VcfFormatIteratorFloat
VcfFormatIterator< float, double > VcfFormatIteratorFloat
Definition: vcf_format_iterator.hpp:67
genesis::population::VcfRecord::get_variant_count
size_t get_variant_count() const
Get the total number of variants (REF and ALT alleles) in the record/line.
Definition: vcf_record.cpp:275
genesis::population::VcfRecord::get_filter_ids
std::vector< std::string > get_filter_ids() const
Get the list of all filter values (PASS or the names of the non-passing filters) that are applied to ...
Definition: vcf_record.cpp:344
genesis::population::VcfRecord::is_snp_or_alt_del
bool is_snp_or_alt_del() const
Return whether this variant is a SNP, or a deletion in the alternative.
Definition: vcf_record.cpp:309
genesis::population::VcfRecord
Capture the information of a single SNP/variant line in a VCF/BCF file.
Definition: vcf_record.hpp:107
genesis::population::VcfRecord::get_info_int
std::vector< int32_t > get_info_int(std::string const &id) const
Return the info value for the given key id as a vector of int.
Definition: vcf_record.cpp:478
genesis::population::VcfRecord::get_alternatives_count
size_t get_alternatives_count() const
Get the number of alternative alleles/sequences of the variant (ALT, fifth column of the line).
Definition: vcf_record.cpp:239
genesis::population::VcfRecord::end_format_float
VcfFormatIteratorFloat end_format_float() const
Get the end iterator over the samples that accesses a certain FORMAT id as a float value.
Definition: vcf_record.cpp:612
genesis::population::VcfRecord::VariantType
VariantType
Types of variants of alleles that can occur in a record.
Definition: vcf_record.hpp:121
genesis::population::VcfRecord::unpack
void unpack() const
Unpack the htslib bcf1_t record data.
Definition: vcf_record.cpp:165
vcf_record.hpp
genesis::population::VcfRecord::get_format_int
genesis::utils::Range< VcfFormatIteratorInt > get_format_int(std::string const &id) const
Get an iterator pair over the samples that accesses a certain FORMAT id as an int value.
Definition: vcf_record.cpp:598
genesis::population::VcfRecord::get_info_float
std::vector< double > get_info_float(std::string const &id) const
Return the info value for the given key id as a vector of float/double.
Definition: vcf_record.cpp:454
genesis::population::VcfRecord::VariantType::kIndel
@ kIndel
genesis::population::VcfRecord::get_variant
std::string get_variant(size_t index) const
Get a particular variant (REF or ALT allele).
Definition: vcf_record.cpp:260
genesis::population::VcfHeader
Capture the information from a header of a VCF/BCF file.
Definition: vcf_header.hpp:102
genesis::population::HtsFile
Wrap an ::htsFile struct.
Definition: hts_file.hpp:56
genesis::population::VcfRecord::VariantType::kRef
@ kRef
genesis::population::VcfRecord::VariantType::kOverlap
@ kOverlap
genesis::population::VcfRecord::get_quality
double get_quality() const
Get the quality score (QUAL, sixth column of the line).
Definition: vcf_record.cpp:335
genesis::population::VcfRecord::~VcfRecord
~VcfRecord()
Definition: vcf_record.cpp:123
genesis::population::VcfRecord::has_filter
bool has_filter(std::string const &filter) const
Return whether the record has a given filter set.
Definition: vcf_record.cpp:354