A library for working with phylogenetic data.
v0.25.0
vcf_record.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2021 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lczech@carnegiescience.edu>
20  Department of Plant Biology, Carnegie Institution For Science
21  260 Panama Street, Stanford, CA 94305, USA
22 */
23 
31 #ifdef GENESIS_HTSLIB
32 
34 
37 
38 extern "C" {
39  #include <htslib/hts.h>
40  #include <htslib/vcf.h>
41 }
42 
43 #include <cassert>
44 #include <cstdint>
45 #include <cstdlib>
46 #include <cstring>
47 #include <stdexcept>
48 
49 namespace genesis {
50 namespace population {
51 
52 // =================================================================================================
53 // Typedefs and Enums
54 // =================================================================================================
55 
56 // VariantType
57 static_assert(
58  static_cast<int>( VcfRecord::VariantType::kRef ) == VCF_REF,
59  "Definitions of VCF_REF in htslib and of VariantType::kRef in genesis differ. "
60  "Please submit a bug report at https://github.com/lczech/genesis/issues"
61 );
62 static_assert(
63  static_cast<int>( VcfRecord::VariantType::kSnp ) == VCF_SNP,
64  "Definitions of VCF_SNP in htslib and of VariantType::kSnp in genesis differ. "
65  "Please submit a bug report at https://github.com/lczech/genesis/issues"
66 );
67 static_assert(
68  static_cast<int>( VcfRecord::VariantType::kMnp ) == VCF_MNP,
69  "Definitions of VCF_MNP in htslib and of VariantType::kMnp in genesis differ. "
70  "Please submit a bug report at https://github.com/lczech/genesis/issues"
71 );
72 static_assert(
73  static_cast<int>( VcfRecord::VariantType::kIndel ) == VCF_INDEL,
74  "Definitions of VCF_INDEL in htslib and of VariantType::kIndel in genesis differ. "
75  "Please submit a bug report at https://github.com/lczech/genesis/issues"
76 );
77 static_assert(
78  static_cast<int>( VcfRecord::VariantType::kOther ) == VCF_OTHER,
79  "Definitions of VCF_OTHER in htslib and of VariantType::kOther in genesis differ. "
80  "Please submit a bug report at https://github.com/lczech/genesis/issues"
81 );
82 static_assert(
83  static_cast<int>( VcfRecord::VariantType::kBreakend ) == VCF_BND,
84  "Definitions of VCF_BND in htslib and of VariantType::kBreakend in genesis differ. "
85  "Please submit a bug report at https://github.com/lczech/genesis/issues"
86 );
87 static_assert(
88  static_cast<int>( VcfRecord::VariantType::kOverlap ) == VCF_OVERLAP,
89  "Definitions of VCF_OVERLAP in htslib and of VariantType::kOverlap in genesis differ. "
90  "Please submit a bug report at https://github.com/lczech/genesis/issues"
91 );
92 
93 // =================================================================================================
94 // Constructors and Rule of Five
95 // =================================================================================================
96 
98 {
99  record_ = ::bcf_init();
100  if( ! record_ ) {
101  throw std::runtime_error( "Failed to default-initialize VcfRecord bcf1_t data structure." );
102  }
103 }
104 
106 {
107  header_ = &header;
108  record_ = ::bcf_init();
109  if( ! record_ ) {
110  throw std::runtime_error( "Failed to initialize VcfRecord bcf1_t data structure." );
111  }
112 }
113 
114 VcfRecord::VcfRecord( VcfHeader& header, ::bcf1_t* bcf1 )
115 {
116  header_ = &header;
117  record_ = ::bcf_dup( bcf1 );
118  if( ! record_ ) {
119  throw std::runtime_error( "Failed to copy-initialize VcfRecord bcf1_t data structure." );
120  }
121 }
122 
124 {
125  if( record_ ) {
126  ::bcf_destroy( record_ );
127  }
128  free( info_dest_string_ );
129  free( info_dest_float_ );
130  free( info_dest_int_ );
131 }
132 
134 {
135  // We swap everything, so that once `other` gets destroyed, our current data of `this` gets
136  // also destroyed with it.
137  this->swap( other );
138 }
139 
141 {
142  if( this == &other ) {
143  return *this;
144  }
145  this->swap( other );
146  return *this;
147 }
148 
150 {
151  std::swap( header_, other.header_ );
152  std::swap( record_, other.record_ );
153  std::swap( info_dest_string_, other.info_dest_string_ );
154  std::swap( info_dest_float_, other.info_dest_float_ );
155  std::swap( info_dest_int_, other.info_dest_int_ );
156  std::swap( info_ndest_string_, other.info_ndest_string_ );
157  std::swap( info_ndest_float_, other.info_ndest_float_ );
158  std::swap( info_ndest_int_, other.info_ndest_int_ );
159 }
160 
161 // =================================================================================================
162 // Simple Fixed Columns
163 // =================================================================================================
164 
165 void VcfRecord::unpack() const
166 {
167  ::bcf_unpack( record_, BCF_UN_STR );
168 }
169 
170 std::string VcfRecord::get_chromosome() const
171 {
172  return ::bcf_hdr_id2name( header_->data(), record_->rid );
173 }
174 
176 {
177  // This one time, htslib wants to be smart and make the position 0-based. While we appreciate
178  // their effort, in that case, this leads to inconcruencies for users who are not aware of this.
179  // Hence, we "fix" this back to the original number as given in the 1-based VCF/BCF file.
180  return record_->pos + 1;
181 }
182 
183 std::string VcfRecord::get_id() const
184 {
185  ::bcf_unpack( record_, BCF_UN_STR );
186  return std::string( record_->d.id );
187 }
188 
189 std::string VcfRecord::at() const
190 {
191  auto const pos_id = std::string( get_id() != "." ? " (" + get_id() + ")" : "" );
192  return get_chromosome() + ":" + std::to_string( get_position() ) + pos_id;
193 }
194 
195 std::string VcfRecord::get_reference() const
196 {
197  // The REF allele is stored as allele[0], and its length is also stored in rlen, in addition to
198  // it being null terminated anyway. Let's all use this, and assert this.
199  ::bcf_unpack( record_, BCF_UN_STR );
200  assert( record_->n_allele > 0 );
201  assert( std::strlen(record_->d.allele[0]) == static_cast<size_t>( record_->rlen ));
202  return std::string( record_->d.allele[0] );
203 }
204 
205 std::vector<std::string> VcfRecord::get_alternatives() const
206 {
207  // The ALT alleles are stored in allele[1..n], so we need to re-index into our result vector.
208  ::bcf_unpack( record_, BCF_UN_STR );
209  assert( record_->n_allele > 0 );
210  auto ret = std::vector<std::string>( record_->n_allele - 1 );
211  for( size_t i = 1; i < record_->n_allele; ++i ) {
212  ret[ i - 1 ] = std::string( record_->d.allele[i] );
213  }
214  return ret;
215 }
216 
217 std::string VcfRecord::get_alternative( size_t index ) const
218 {
219  // The ALT alleles are stored in allele[1..n], so we need to re-index into our result vector.
220  ::bcf_unpack( record_, BCF_UN_STR );
221  assert( record_->n_allele > 0 );
222  if( index + 1 >= record_->n_allele ) {
223  throw std::invalid_argument(
224  "Cannot retrieve alternative at index " + std::to_string(index) + ", as the record " +
225  "line only has " + std::to_string( record_->n_allele - 1 ) + " alternative alleles."
226  );
227  }
228  assert( index + 1 < record_->n_allele );
229  return record_->d.allele[ index + 1 ];
230 }
231 
233 {
234  // Even if there are no alternatives (that is, set to "."), there has to be at least the REF
235  // allele, which we assert here, so that the re-indexing is ensured to work.
236  ::bcf_unpack( record_, BCF_UN_STR );
237  assert( record_->n_allele > 0 );
238  return record_->n_allele - 1;
239 }
240 
241 std::vector<std::string> VcfRecord::get_variants() const
242 {
243  // Bit of code duplication here, but this is just short enough to not require
244  // an extra function, at least for now.
245  ::bcf_unpack( record_, BCF_UN_STR );
246  auto ret = std::vector<std::string>( record_->n_allele );
247  for( size_t i = 0; i < record_->n_allele; ++i ) {
248  ret[i] = std::string( record_->d.allele[i] );
249  }
250  return ret;
251 }
252 
253 std::string VcfRecord::get_variant( size_t index ) const
254 {
255  ::bcf_unpack( record_, BCF_UN_STR );
256  assert( record_->n_allele > 0 );
257  if( index >= record_->n_allele ) {
258  throw std::invalid_argument(
259  "Cannot retrieve variant at index " + std::to_string(index) + ", as the record " +
260  "line only has " + std::to_string( record_->n_allele ) + " variants (reference + " +
261  "alternative alleles)."
262  );
263  }
264  assert( index < record_->n_allele );
265  return record_->d.allele[ index ];
266 }
267 
269 {
270  ::bcf_unpack( record_, BCF_UN_STR );
271  assert( record_->n_allele > 0 );
272  return record_->n_allele;
273 }
274 
276 {
277  return static_cast<VariantType>( ::bcf_get_variant_types( record_ ));
278 }
279 
281 {
282  // Nope, `bcf_dec_t.n_var` is NOT the number of variants that this record has. It is the
283  // allocated size, which might be bigger if the `bcf1_t` is re-used between records.
284  // So, we have to use `bcf1_t.n_allele` to get to the number of actual valid entries...
285  // Furthermore, `bcf_dec_t.var_type` is not necessarily one type, but the or'ed value
286  // of all types of the variant alleles. Of course. Well documented, htslib!
287  if( alt_index >= record_->n_allele ) {
288  throw std::runtime_error(
289  "Alternative allele index " + std::to_string( alt_index ) +
290  " out of bounds of the number of alleles " + std::to_string( record_->n_allele ) +
291  " of the record."
292  );
293  }
294  return static_cast<VariantType>( ::bcf_get_variant_type( record_, static_cast<int>( alt_index )));
295 }
296 
297 bool VcfRecord::is_snp() const
298 {
299  return ::bcf_is_snp( record_ );
300 }
301 
303 {
304  return record_->qual;
305 }
306 
307 // =================================================================================================
308 // Filter Column
309 // =================================================================================================
310 
311 std::vector<std::string> VcfRecord::get_filter_ids() const
312 {
313  ::bcf_unpack( record_, BCF_UN_FLT );
314  auto ret = std::vector<std::string>();
315  for( size_t i = 0; i < static_cast<size_t>( record_->d.n_flt ); ++i ) {
316  ret.push_back( std::string( bcf_hdr_int2id( header_->data(), BCF_DT_ID, record_->d.flt[i] )));
317  }
318  return ret;
319 }
320 
321 bool VcfRecord::has_filter( std::string const& filter ) const
322 {
323  // htslib expects a non-const pointer, as it potentially mutates the string...
324  char* cstr = new char[ filter.length() + 1] ;
325  std::strcpy( cstr, filter.c_str() );
326 
327  // Make the call.
328  int const res = ::bcf_has_filter( header_->data(), record_, cstr );
329 
330  // Clean up and check result. Free the string before the potential exception!
331  delete [] cstr;
332  if( res == -1 ) {
333  throw std::runtime_error( "Filter '" + filter + "' not defined in VCF/BCF header." );
334  }
335  return res;
336 }
337 
339 {
340  // We here take a shortcut to avoid the above string copy in has_filter().
341  // However, we still have to explicitly create the char array, as the htslib function
342  // expects a non-const char*, which does not work by simply passing "PASS" as a string literal.
343  char pass[] = "PASS";
344  return ::bcf_has_filter( header_->data(), record_, pass );
345 }
346 
347 // =================================================================================================
348 // Info Column
349 // =================================================================================================
350 
351 std::vector<std::string> VcfRecord::get_info_ids() const
352 {
353  ::bcf_unpack( record_, BCF_UN_INFO );
354  auto ret = std::vector<std::string>( record_->n_info );
355  for( size_t i = 0; i < static_cast<size_t>( record_->n_info ); ++i ) {
356  ret[i] = std::string( bcf_hdr_int2id( header_->data(), BCF_DT_ID, record_->d.info[i].key ));
357  }
358  return ret;
359 }
360 
361 bool VcfRecord::has_info( std::string const& id ) const
362 {
363  return has_info( id.c_str() );
364 }
365 
366 bool VcfRecord::has_info( char const* id ) const
367 {
368  return ::bcf_get_info( header_->data(), record_, id ) != nullptr;
369 
370  // The below code seems to return whether the field exists at all in the header... not what we want.
371  // int const id = bcf_hdr_id2int( header_->data(), BCF_DT_ID, id.c_str() );
372  // return bcf_hdr_idinfo_exists( header_->data(), BCF_HL_INFO, id );
373 }
374 
375 void VcfRecord::assert_info( std::string const& id ) const
376 {
377  assert_info( id.c_str() );
378 }
379 
380 void VcfRecord::assert_info( char const* id ) const
381 {
382  if( ! ::bcf_get_info( header_->data(), record_, id )) {
383  throw std::runtime_error(
384  "Required INFO tag " + std::string( id ) + " is not present in the record at " + at()
385  );
386  }
387 }
388 
389 std::string VcfRecord::get_info_string( std::string const& id ) const
390 {
391  std::string result;
392  get_info_string( id, result );
393  return result;
394 }
395 
396 void VcfRecord::get_info_string( std::string const& id, std::string& destination ) const
397 {
398  // Load the result into our buffer that we use to avoid reallocating memory all the time.
399  auto const len = get_info_ptr_(
400  id, BCF_HT_STR, reinterpret_cast<void**>( &info_dest_string_ ), &info_ndest_string_
401  );
402  assert( len >= 0 );
403  assert( info_ndest_string_ >= 0 );
404  assert( info_ndest_string_ >= len );
405 
406  // Copy into our destination string.
407  // destination.clear();
408  // destination.reserve( info_ndest_string_ );
409  // for( int i = 0; i < info_ndest_string_; ++i ) {
410  // if( destination.size() ) {
411  // destination += "***";
412  // }
413  // destination.append( std::string( static_cast<char*>( info_dest_string_ )));
414  // }
415 
416  destination.assign( static_cast<char*>( info_dest_string_ ), len );
417 }
418 
419 std::vector<double> VcfRecord::get_info_float( std::string const& id ) const
420 {
421  std::vector<double> result;
422  get_info_float( id, result );
423  return result;
424 }
425 
426 void VcfRecord::get_info_float( std::string const& id, std::vector<double>& destination ) const
427 {
428  // Load the result into our buffer that we use to avoid reallocating memory all the time.
429  auto const len = get_info_ptr_(
430  id, BCF_HT_REAL, reinterpret_cast<void**>( &info_dest_float_ ), &info_ndest_float_
431  );
432  assert( len >= 0 );
433  assert( info_ndest_float_ >= 0 );
434  assert( info_ndest_float_ >= len );
435 
436  // Copy over into vector.
437  destination.resize( len );
438  for( int i = 0; i < len; ++i ) {
439  destination[i] = static_cast<double>( static_cast<float*>(info_dest_float_)[i] );
440  }
441 }
442 
443 std::vector<int32_t> VcfRecord::get_info_int( std::string const& id ) const
444 {
445  std::vector<int32_t> result;
446  get_info_int( id, result );
447  return result;
448 }
449 
450 void VcfRecord::get_info_int( std::string const& id, std::vector<int32_t>& destination ) const
451 {
452  // Load the result into our buffer that we use to avoid reallocating memory all the time.
453  auto const len = get_info_ptr_(
454  id, BCF_HT_INT, reinterpret_cast<void**>( &info_dest_int_ ), &info_ndest_int_
455  );
456  assert( len >= 0 );
457  assert( info_ndest_int_ >= 0 );
458  assert( info_ndest_int_ >= len );
459 
460  // Copy over into vector.
461  destination.resize( len );
462  for( int i = 0; i < len; ++i ) {
463  destination[i] = static_cast<int32_t*>(info_dest_int_)[i];
464  }
465 }
466 
467 bool VcfRecord::get_info_flag( std::string const& id ) const
468 {
469  // For flags, the destination pointer is not used, and instead the value is immediately returned.
470  return get_info_ptr_( id, BCF_HT_FLAG, nullptr, nullptr );
471 }
472 
473 // =================================================================================================
474 // Format Column
475 // =================================================================================================
476 
477 std::vector<std::string> VcfRecord::get_format_ids() const
478 {
479  ::bcf_unpack( record_, BCF_UN_FMT );
480  auto ret = std::vector<std::string>( record_->n_fmt );
481  for( size_t i = 0; i < static_cast<size_t>( record_->n_fmt ); ++i ) {
482  ret[i] = std::string( bcf_hdr_int2id( header_->data(), BCF_DT_ID, record_->d.fmt[i].id ));
483  }
484  return ret;
485 }
486 
487 bool VcfRecord::has_format( std::string const& id ) const
488 {
489  return has_format( id.c_str() );
490 
491 }
492 
493 bool VcfRecord::has_format( char const* id ) const
494 {
495  return ::bcf_get_fmt( header_->data(), record_, id ) != nullptr;
496 
497 }
498 
499 void VcfRecord::assert_format( std::string const& id ) const
500 {
501  assert_format( id.c_str() );
502 }
503 
504 void VcfRecord::assert_format( char const* id ) const
505 {
506  if( ! ::bcf_get_fmt( header_->data(), record_, id )) {
507  throw std::runtime_error(
508  "Required FORMAT tag " + std::string( id ) + " is not present in the record at " + at()
509  );
510  }
511 }
512 
513 // =================================================================================================
514 // Sample Columns
515 // =================================================================================================
516 
518 {
519  return VcfFormatIteratorGenotype( header_->data(), record_, "GT", VcfValueType::kInteger );
520 }
521 
523 {
524  return VcfFormatIteratorGenotype();
525 }
526 
528  return {
529  VcfFormatIteratorGenotype( header_->data(), record_, "GT", VcfValueType::kInteger ),
531  };
532 }
533 
535 {
536  return VcfFormatIteratorString( header_->data(), record_, id, VcfValueType::kString );
537 }
538 
540 {
541  return VcfFormatIteratorString();
542 }
543 
545  std::string const& id
546 ) const {
547  return {
548  VcfFormatIteratorString( header_->data(), record_, id, VcfValueType::kString ),
550  };
551 }
552 
553 VcfFormatIteratorInt VcfRecord::begin_format_int( std::string const& id ) const
554 {
555  return VcfFormatIteratorInt( header_->data(), record_, id, VcfValueType::kInteger );
556 }
557 
559 {
560  return VcfFormatIteratorInt();
561 }
562 
564  std::string const& id
565 ) const {
566  return {
567  VcfFormatIteratorInt( header_->data(), record_, id, VcfValueType::kInteger ),
569  };
570 }
571 
573 {
574  return VcfFormatIteratorFloat( header_->data(), record_, id, VcfValueType::kFloat );
575 }
576 
578 {
579  return VcfFormatIteratorFloat();
580 }
581 
583  std::string const& id
584 ) const {
585  return {
586  VcfFormatIteratorFloat( header_->data(), record_, id, VcfValueType::kFloat ),
588  };
589 }
590 
591 // =================================================================================================
592 // Modifiers
593 // =================================================================================================
594 
596 {
597  bool const good = ( ::bcf_read1( source.data(), header_->data(), record_ ) == 0 );
598  // if( good ) {
599  // ::bcf_unpack( record_ , BCF_UN_ALL );
600  // }
601  return good;
602 }
603 
604 // =================================================================================================
605 // Internal Members
606 // =================================================================================================
607 
608 int VcfRecord::get_info_ptr_( std::string const& id, int ht_type, void** dest, int* ndest) const
609 {
610  // Call the htslib function, and call our function to check the return value, which encodes
611  // for errors as well (if negative). If there was an error, that function call throws
612  // an exception.
613  int const len = ::bcf_get_info_values( header_->data(), record_, id.c_str(), dest, ndest, ht_type );
614  VcfHeader::check_value_return_code_( header_->data(), id, ht_type, BCF_HL_INFO, len );
615 
616  // Assert that if ndest is used (for all but flags), it has a valid value.
617  assert( !ndest || ( *ndest >= 0 && *ndest >= len ));
618  return len;
619 }
620 
621 } // namespace population
622 } // namespace genesis
623 
624 #endif // htslib guard
genesis::placement::swap
void swap(Sample &lhs, Sample &rhs)
Definition: sample.cpp:104
genesis::population::VcfRecord::is_snp
bool is_snp() const
Return whether this variant is a SNP.
Definition: vcf_record.cpp:297
genesis::population::VcfRecord::at
std::string at() const
Return a textual representation of the current record chromosome position.
Definition: vcf_record.cpp:189
genesis::population::VcfRecord::begin_format_float
VcfFormatIteratorFloat begin_format_float(std::string const &id) const
Get the begin iterator over the samples that accesses a certain FORMAT id as a float value.
Definition: vcf_record.cpp:572
genesis::population::VcfRecord::get_variant_type
VariantType get_variant_type(size_t alt_index) const
Get the variant type of a particular alternative allele/sequence.
Definition: vcf_record.cpp:280
genesis::population::VcfRecord::header
VcfHeader & header()
Return the VcfHeader instance associated with this record.
Definition: vcf_record.hpp:213
genesis::population::VcfRecord::get_format_ids
std::vector< std::string > get_format_ids() const
Get the list of all format IDs (FORMAT column) that the record contains.
Definition: vcf_record.cpp:477
genesis::population::VcfRecord::assert_info
void assert_info(std::string const &id) const
Assert that an INFO entry with a given id is present in the record.
Definition: vcf_record.cpp:375
genesis::population::VcfRecord::operator=
VcfRecord & operator=(VcfRecord const &)=delete
genesis::population::VcfRecord::VariantType::kSnp
@ kSnp
genesis::population::VcfValueType::kString
@ kString
genesis::population::VcfRecord::begin_format_genotype
VcfFormatIteratorGenotype begin_format_genotype() const
Get the begin iterator over the samples that accesses the FORMAT genotype (GT field/key/id) as a set ...
Definition: vcf_record.cpp:517
genesis::population::VcfRecord::get_info_string
std::string get_info_string(std::string const &id) const
Return the info value for the given key id as a string.
Definition: vcf_record.cpp:389
genesis::population::VcfValueType::kFloat
@ kFloat
genesis::population::VcfRecord::assert_format
void assert_format(std::string const &id) const
Assert that an FORMAT entry with a given id is present in the record.
Definition: vcf_record.cpp:499
genesis::population::VcfRecord::get_info_flag
bool get_info_flag(std::string const &id) const
Return whehter an INFO flag is set, that is, whether the info value for a given key id is present in ...
Definition: vcf_record.cpp:467
genesis::population::VcfRecord::VariantType::kBreakend
@ kBreakend
genesis::population::VcfRecord::get_id
std::string get_id() const
Get the ID string of the variant (ID, third column of the line).
Definition: vcf_record.cpp:183
genesis::population::VcfRecord::begin_format_int
VcfFormatIteratorInt begin_format_int(std::string const &id) const
Get the begin iterator over the samples that accesses a certain FORMAT id as an int value.
Definition: vcf_record.cpp:553
genesis::population::VcfRecord::end_format_string
VcfFormatIteratorString end_format_string() const
Get the end iterator over the samples that accesses a certain FORMAT id as a string value.
Definition: vcf_record.cpp:539
genesis::population::VcfRecord::get_alternative
std::string get_alternative(size_t index) const
Get a particular alternative allele (ALT, fifth column of the line).
Definition: vcf_record.cpp:217
genesis::population::VcfRecord::get_alternatives
std::vector< std::string > get_alternatives() const
Get the alternative alleles/sequences of the variant (ALT, fifth column of the line).
Definition: vcf_record.cpp:205
genesis::population::VcfRecord::end_format_int
VcfFormatIteratorInt end_format_int() const
Get the end iterator over the samples that accesses a certain FORMAT id as an int value.
Definition: vcf_record.cpp:558
vcf_header.hpp
genesis::population::VcfFormatIteratorString
VcfFormatIterator< char *, std::string > VcfFormatIteratorString
Definition: vcf_format_iterator.hpp:65
genesis::population::VcfRecord::pass_filter
bool pass_filter() const
Return whether the record passes the filters, that is, whether PASS is set.
Definition: vcf_record.cpp:338
genesis::population::VcfRecord::get_variants
std::vector< std::string > get_variants() const
Shortcut to get both the reference (REF, fourth column of the line) and the alternative (ALT,...
Definition: vcf_record.cpp:241
genesis::population::VcfFormatIteratorGenotype
VcfFormatIterator< int32_t, VcfGenotype > VcfFormatIteratorGenotype
Definition: vcf_format_iterator.hpp:68
genesis::population::VcfFormatIterator
Iterate the FORMAT information for the samples in a SNP/variant line in a VCF/BCF file.
Definition: vcf_format_iterator.hpp:62
genesis::population::VcfRecord::swap
void swap(VcfRecord &other)
Definition: vcf_record.cpp:149
genesis::population::VcfRecord::get_info_ids
std::vector< std::string > get_info_ids() const
Get the list of all info IDs (INFO column) that the record contains.
Definition: vcf_record.cpp:351
genesis::population::VcfRecord::read_next
bool read_next(HtsFile &source)
Read the next record/line from the given source, and replace the content of this VcfRecord instance.
Definition: vcf_record.cpp:595
genesis::population::VcfRecord::VariantType::kOther
@ kOther
genesis::population::HtsFile::data
::htsFile * data()
Definition: hts_file.hpp:97
genesis::population::VcfRecord::begin_format_string
VcfFormatIteratorString begin_format_string(std::string const &id) const
Get the begin iterator over the samples that accesses a certain FORMAT id as a string value.
Definition: vcf_record.cpp:534
genesis::population::VcfRecord::get_variant_types
VariantType get_variant_types() const
Get the or'ed (union) value of all variant types of the alternative alleles/sequences of the record.
Definition: vcf_record.cpp:275
genesis::population::VcfRecord::VcfRecord
VcfRecord()
Create a default (empty) instance.
Definition: vcf_record.cpp:97
hts_file.hpp
genesis::population::VcfValueType::kInteger
@ kInteger
genesis::population::VcfRecord::get_chromosome
std::string get_chromosome() const
Get the name of a chromosome/contig/sequence (CHROM, first column of the line).
Definition: vcf_record.cpp:170
genesis::utils::Range
Simple wrapper for typical begin() and end() iterators, to be used in range-based for loops.
Definition: range.hpp:46
genesis::population::VcfRecord::get_format_genotype
genesis::utils::Range< VcfFormatIteratorGenotype > get_format_genotype() const
Get an iterator pair over the samples that accesses the FORMAT genotype (GT field/key/id) as a set of...
Definition: vcf_record.cpp:527
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::VcfRecord::get_reference
std::string get_reference() const
Get the reference allele/sequence of the variant (REF, fourth column of the line).
Definition: vcf_record.cpp:195
genesis::population::VcfRecord::has_format
bool has_format(std::string const &id) const
Return whether the record has a given FORMAT id present.
Definition: vcf_record.cpp:487
genesis::population::VcfRecord::get_format_string
genesis::utils::Range< VcfFormatIteratorString > get_format_string(std::string const &id) const
Get an iterator pair over the samples that accesses a certain FORMAT id as a string value.
Definition: vcf_record.cpp:544
genesis::population::VcfRecord::VariantType::kMnp
@ kMnp
genesis::population::VcfRecord::has_info
bool has_info(std::string const &id) const
Return whether the record has a given INFO id present.
Definition: vcf_record.cpp:361
genesis::population::VcfRecord::get_format_float
genesis::utils::Range< VcfFormatIteratorFloat > get_format_float(std::string const &id) const
Get an iterator pair over the samples that accesses a certain FORMAT id as an float value.
Definition: vcf_record.cpp:582
genesis::population::VcfRecord::get_position
size_t get_position() const
Get the position within the chromosome/contig (POS, second column of the line).
Definition: vcf_record.cpp:175
genesis::population::VcfRecord::end_format_genotype
VcfFormatIteratorGenotype end_format_genotype() const
Get the end iterator over the samples that accesses the FORMAT genotype (GT field/key/id) as a set of...
Definition: vcf_record.cpp:522
genesis::population::VcfHeader::data
::bcf_hdr_t * data()
Return the internal htslib ::bcf_hdr_t data struct pointer.
Definition: vcf_header.hpp:177
genesis::population::VcfFormatIteratorInt
VcfFormatIterator< int32_t, int32_t > VcfFormatIteratorInt
Definition: vcf_format_iterator.hpp:66
genesis::population::VcfFormatIteratorFloat
VcfFormatIterator< float, double > VcfFormatIteratorFloat
Definition: vcf_format_iterator.hpp:67
genesis::population::VcfRecord::get_variant_count
size_t get_variant_count() const
Get the total number of variants (REF and ALT alleles) in the record/line.
Definition: vcf_record.cpp:268
genesis::population::to_string
std::string to_string(GenomeRegion const &region)
Definition: functions/genome_region.cpp:55
genesis::population::VcfRecord::get_filter_ids
std::vector< std::string > get_filter_ids() const
Get the list of all filter values (PASS or the names of the non-passing filters) that are applied to ...
Definition: vcf_record.cpp:311
genesis::population::VcfRecord
Capture the information of a single SNP/variant line in a VCF/BCF file.
Definition: vcf_record.hpp:107
genesis::population::VcfRecord::get_info_int
std::vector< int32_t > get_info_int(std::string const &id) const
Return the info value for the given key id as a vector of int.
Definition: vcf_record.cpp:443
genesis::population::VcfRecord::get_alternatives_count
size_t get_alternatives_count() const
Get the number of alternative alleles/sequences of the variant (ALT, fifth column of the line).
Definition: vcf_record.cpp:232
genesis::population::VcfRecord::end_format_float
VcfFormatIteratorFloat end_format_float() const
Get the end iterator over the samples that accesses a certain FORMAT id as a float value.
Definition: vcf_record.cpp:577
genesis::population::VcfRecord::VariantType
VariantType
Types of variants of alleles that can occur in a record.
Definition: vcf_record.hpp:121
genesis::population::VcfRecord::unpack
void unpack() const
Unpack the htslib bcf1_t record data.
Definition: vcf_record.cpp:165
vcf_record.hpp
genesis::population::VcfRecord::get_format_int
genesis::utils::Range< VcfFormatIteratorInt > get_format_int(std::string const &id) const
Get an iterator pair over the samples that accesses a certain FORMAT id as an int value.
Definition: vcf_record.cpp:563
genesis::population::VcfRecord::get_info_float
std::vector< double > get_info_float(std::string const &id) const
Return the info value for the given key id as a vector of float/double.
Definition: vcf_record.cpp:419
genesis::population::VcfRecord::VariantType::kIndel
@ kIndel
genesis::population::VcfRecord::get_variant
std::string get_variant(size_t index) const
Get a particular variant (REF or ALT allele).
Definition: vcf_record.cpp:253
genesis::population::VcfHeader
Capture the information from a header of a VCF/BCF file.
Definition: vcf_header.hpp:102
genesis::population::HtsFile
Wrap an ::htsFile struct.
Definition: hts_file.hpp:56
genesis::population::VcfRecord::VariantType::kRef
@ kRef
genesis::population::VcfRecord::VariantType::kOverlap
@ kOverlap
genesis::population::VcfRecord::get_quality
double get_quality() const
Get the quality score (QUAL, sixth column of the line).
Definition: vcf_record.cpp:302
genesis::population::VcfRecord::~VcfRecord
~VcfRecord()
Definition: vcf_record.cpp:123
genesis::population::VcfRecord::has_filter
bool has_filter(std::string const &filter) const
Return whether the record has a given filter set.
Definition: vcf_record.cpp:321