A library for working with phylogenetic and population genetic data.
v0.27.0
vcf_header.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2021 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
31 #ifdef GENESIS_HTSLIB
32 
34 
36 
37 extern "C" {
38  #include <htslib/hts.h>
39  #include <htslib/vcf.h>
40 }
41 
42 #include <cassert>
43 #include <cstdlib>
44 #include <cstring>
45 #include <stdexcept>
46 
47 namespace genesis {
48 namespace population {
49 
50 // =================================================================================================
51 // Constructors and Rule of Five
52 // =================================================================================================
53 
54 // VcfHeader::VcfHeader( std::string const& file_name )
55 // {
56 // // We could use our HtsFile class here, but as we only need this access here very locally,
57 // // we directly use the htslib functions instead.
58 //
59 // // Open the file.
60 // auto file_ = ::hts_open( file_name.c_str(), "r" );
61 // if( ! file_ ) {
62 // throw std::runtime_error( "Failed to open VCF/BCF file " + file_name );
63 // }
64 //
65 // // Read header.
66 // header_ = ::bcf_hdr_read( file_ );
67 //
68 // // Close the file again.
69 // ::hts_close( file_ );
70 // }
71 
72 VcfHeader::VcfHeader( std::string const& mode )
73 {
74  header_ = ::bcf_hdr_init( mode.c_str() );
75  if( ! header_ ) {
76  throw std::runtime_error( "Failed to initialize VcfHeader bcf_hdr_t data structure." );
77  }
78 }
79 
81 {
82  // Read header.
83  header_ = ::bcf_hdr_read( hts_file.data() );
84  if( ! header_ ) {
85  throw std::runtime_error(
86  "Failed to initialize VcfHeader bcf_hdr_t data structure for file " +
87  hts_file.file_name()
88  );
89  }
90 }
91 
92 VcfHeader::VcfHeader( ::bcf_hdr_t* bcf_hdr )
93 {
94  header_ = ::bcf_hdr_dup( bcf_hdr );
95  if( ! header_ ) {
96  throw std::runtime_error( "Failed to copy-initialize VcfHeader bcf_hdr_t data structure." );
97  }
98 }
99 
101 {
102  if( header_ ) {
103  ::bcf_hdr_destroy( header_ );
104  }
105 }
106 
108 {
109  // We need a custom move, as the explicitly defaulted one "moves" the header_ pointer, which
110  // effectively copies it, and then upon destruction of the moved-from object frees the header_.
111  // So, instead we swap, so that once `other` gets destroyed (as it is moved from, it will go
112  // out of scope soon), our current data of `this` gets also destroyed with it.
113  std::swap( header_, other.header_ );
114 }
115 
117 {
118  // Same reasoning as above. Need additional check for self assignment,
119  // as otherwise we'd destroy the header as well.
120  if( this == &other ) {
121  return *this;
122  }
123  std::swap( header_, other.header_ );
124  return *this;
125 }
126 
127 // =================================================================================================
128 // General Acccessors
129 // =================================================================================================
130 
131 std::string VcfHeader::version() const
132 {
133  return std::string( ::bcf_hdr_get_version( header_ ));
134 }
135 
136 // =================================================================================================
137 // Chromosomes / Contigs / Sequences
138 // =================================================================================================
139 
140 std::vector<std::string> VcfHeader::get_chromosomes() const
141 {
142  // bcf_hdr_seqnames returns a newly allocated array of pointers to the seq names.
143  // We have to deallocate the array, but not the seqnames themselves; the number
144  // of sequences is stored in the int pointer passed in as the second argument.
145  // The id in each record can be used to index into the array to obtain the sequence name.
146  int nseq = 0;
147  const char** seqnames = ::bcf_hdr_seqnames( header_, &nseq );
148  assert( nseq >= 0 );
149 
150  // If there are supposed to be names, but the array still is empty, we have an error.
151  if( nseq > 0 && !seqnames ) {
152  throw std::runtime_error(
153  "Cannot obtain chromosome/contig/sequence names from VCF/BCF header."
154  );
155  }
156 
157  // Copy over to result.
158  auto res = std::vector<std::string>( nseq );
159  for( size_t i = 0; i < static_cast<size_t>(nseq); ++i ) {
160  res[i] = std::string( seqnames[i] );
161 
162  // bcf_hdr_id2name is another way to get the name of a sequence.
163  // Assert that this is identical.
164  assert( res[i] == std::string( ::bcf_hdr_id2name( header_, i )));
165  }
166 
167  // Clean up and return.
168  if( seqnames != nullptr ) {
169  free(seqnames);
170  }
171  return res;
172 }
173 
174 size_t VcfHeader::get_chromosome_length( std::string const& chrom_name ) const
175 {
176  if( chrom_name.empty() ) {
177  throw std::invalid_argument( "Invalid chromosome name: empty" );
178  }
179 
180  auto const id = ::bcf_hdr_name2id( header_, chrom_name.c_str() );
181  if( id < 0 ) {
182  throw std::invalid_argument( "Invalid chromosome name '" + chrom_name +"'" );
183  }
184 
185  size_t const result = header_->id[BCF_DT_CTG][id].val->info[0];
186  assert(
187  get_chromosome_values( chrom_name ).count("length") == 0 ||
188  std::stoul( get_chromosome_values( chrom_name ).at("length") ) == result
189  );
190  return result;
191 }
192 
193 std::unordered_map<std::string, std::string> VcfHeader::get_chromosome_values( std::string const& chrom_name ) const
194 {
195  return get_hrec_values_( BCF_HL_CTG, chrom_name );
196 }
197 
198 // =================================================================================================
199 // Filter
200 // =================================================================================================
201 
202 std::vector<std::string> VcfHeader::get_filter_ids() const
203 {
204  return get_hrec_ids_( BCF_HL_FLT );
205 }
206 
207 std::unordered_map<std::string, std::string> VcfHeader::get_filter_values( std::string const& id ) const
208 {
209  return get_hrec_values_( BCF_HL_FLT, id );
210 }
211 
212 void VcfHeader::assert_filter( std::string const& id ) const
213 {
214  test_hl_entry_( true, BCF_HL_FLT, id, false, {}, false, {}, false, 0 );
215 }
216 
217 bool VcfHeader::has_filter( std::string const& id ) const
218 {
219  return test_hl_entry_( false, BCF_HL_FLT, id, false, {}, false, {}, false, 0 );
220 }
221 
222 // =================================================================================================
223 // Info
224 // =================================================================================================
225 
226 std::vector<std::string> VcfHeader::get_info_ids() const
227 {
228  return get_hrec_ids_( BCF_HL_INFO );
229 }
230 
232 {
233  return get_specification_( BCF_HL_INFO, id );
234 }
235 
236 std::unordered_map<std::string, std::string> VcfHeader::get_info_values( std::string const& id ) const
237 {
238  return get_hrec_values_( BCF_HL_INFO, id );
239 }
240 
241 void VcfHeader::assert_info( std::string const& id ) const
242 {
243  test_hl_entry_( true, BCF_HL_INFO, id, false, {}, false, {}, false, 0 );
244 }
245 
246 void VcfHeader::assert_info( std::string const& id, VcfValueType type ) const
247 {
248  test_hl_entry_( true, BCF_HL_INFO, id, true, type, false, {}, false, 0 );
249 }
250 
251 void VcfHeader::assert_info( std::string const& id, VcfValueType type, VcfValueSpecial num ) const
252 {
253  test_hl_entry_( true, BCF_HL_INFO, id, true, type, true, num, false, 0 );
254 }
255 
256 void VcfHeader::assert_info( std::string const& id, VcfValueType type, size_t number ) const
257 {
258  test_hl_entry_( true, BCF_HL_INFO, id, true, type, true, VcfValueSpecial::kFixed, true, number );
259 }
260 
261 bool VcfHeader::has_info( std::string const& id ) const
262 {
263  return test_hl_entry_( false, BCF_HL_INFO, id, false, {}, false, {}, false, 0 );
264 }
265 
266 bool VcfHeader::has_info( std::string const& id, VcfValueType type ) const
267 {
268  return test_hl_entry_( false, BCF_HL_INFO, id, true, type, false, {}, false, 0 );
269 }
270 
271 bool VcfHeader::has_info( std::string const& id, VcfValueType type, VcfValueSpecial num ) const
272 {
273  return test_hl_entry_( false, BCF_HL_INFO, id, true, type, true, num, false, 0 );
274 }
275 
276 bool VcfHeader::has_info( std::string const& id, VcfValueType type, size_t number ) const
277 {
278  return test_hl_entry_( false, BCF_HL_INFO, id, true, type, true, VcfValueSpecial::kFixed, true, number );
279 }
280 
281 // =================================================================================================
282 // Format
283 // =================================================================================================
284 
285 std::vector<std::string> VcfHeader::get_format_ids() const
286 {
287  return get_hrec_ids_( BCF_HL_FMT );
288 }
289 
291 {
292  return get_specification_( BCF_HL_FMT, id );
293 }
294 
295 std::unordered_map<std::string, std::string> VcfHeader::get_format_values( std::string const& id ) const
296 {
297  return get_hrec_values_( BCF_HL_FMT, id );
298 }
299 
300 void VcfHeader::assert_format( std::string const& id ) const
301 {
302  test_hl_entry_( true, BCF_HL_FMT, id, false, {}, false, {}, false, 0 );
303 }
304 
305 void VcfHeader::assert_format( std::string const& id, VcfValueType type ) const
306 {
307  test_hl_entry_( true, BCF_HL_FMT, id, true, type, false, {}, false, 0 );
308 }
309 
310 void VcfHeader::assert_format( std::string const& id, VcfValueType type, VcfValueSpecial num ) const
311 {
312  test_hl_entry_( true, BCF_HL_FMT, id, true, type, true, num, false, 0 );
313 }
314 
315 void VcfHeader::assert_format( std::string const& id, VcfValueType type, size_t number ) const
316 {
317  test_hl_entry_( true, BCF_HL_FMT, id, true, type, true, VcfValueSpecial::kFixed, true, number );
318 }
319 
320 bool VcfHeader::has_format( std::string const& id ) const
321 {
322  return test_hl_entry_( false, BCF_HL_FMT, id, false, {}, false, {}, false, 0 );
323 }
324 
325 bool VcfHeader::has_format( std::string const& id, VcfValueType type ) const
326 {
327  return test_hl_entry_( false, BCF_HL_FMT, id, true, type, false, {}, false, 0 );
328 }
329 
330 bool VcfHeader::has_format( std::string const& id, VcfValueType type, VcfValueSpecial num ) const
331 {
332  return test_hl_entry_( false, BCF_HL_FMT, id, true, type, true, num, false, 0 );
333 }
334 
335 bool VcfHeader::has_format( std::string const& id, VcfValueType type, size_t number ) const
336 {
337  return test_hl_entry_( false, BCF_HL_FMT, id, true, type, true, VcfValueSpecial::kFixed, true, number );
338 }
339 
340 // =================================================================================================
341 // Samples
342 // =================================================================================================
343 
345 {
346  assert( bcf_hdr_nsamples(header_) == header_->n[BCF_DT_SAMPLE] );
347  return header_->n[BCF_DT_SAMPLE];
348 }
349 
350 std::string VcfHeader::get_sample_name( size_t index ) const
351 {
352  if( index >= get_sample_count() ) {
353  throw std::invalid_argument(
354  "Cannot get sample name for sample at index " + std::to_string(index) +
355  ", as the VCF/BCF file only uses " + std::to_string( get_sample_count() ) + " samples."
356  );
357  }
358  return header_->samples[index];
359 }
360 
361 size_t VcfHeader::get_sample_index( std::string const& name ) const
362 {
363  assert( bcf_hdr_nsamples(header_) == header_->n[BCF_DT_SAMPLE] );
364  size_t const sample_count = header_->n[BCF_DT_SAMPLE];
365  for( size_t i = 0; i < sample_count; ++i ) {
366  if( strcmp( name.c_str(), header_->samples[i] ) == 0 ) {
367  return i;
368  }
369  }
370  throw std::runtime_error( "Sample name '" + name + "' not found in VCF file." );
371 }
372 
373 std::vector<std::string> VcfHeader::get_sample_names() const
374 {
375  assert( bcf_hdr_nsamples(header_) == header_->n[BCF_DT_SAMPLE] );
376  size_t const sample_count = header_->n[BCF_DT_SAMPLE];
377  auto result = std::vector<std::string>( sample_count );
378  for( size_t i = 0; i < sample_count; ++i ) {
379  result[i] = std::string( header_->samples[i] );
380  }
381  return result;
382 }
383 
385  std::vector<std::string> const& sample_names,
386  bool inverse_sample_names
387 ) {
388  // Restrict the samples that are read for each record, using the provided list of names.
389  int suc = 0;
390  if( sample_names.empty() ) {
391  // If an empty list was supplied, either read none or all samples,
392  // depending on inverse_sample_names.
393  if( inverse_sample_names ) {
394  // Basically, this is identical to not calling the function at all.
395  // But for completeness, we still call it here.
396  suc = ::bcf_hdr_set_samples( header_, "-", 0 );
397  } else {
398  suc = ::bcf_hdr_set_samples( header_, nullptr, 0 );
399  }
400  } else {
401  // If an actual list of sample names is given, we build the required string from it,
402  // then pass that to the htslib function, and check the result for errors.
403  std::string list = ( inverse_sample_names ? "^" : "" );
404  for( size_t i = 0; i < sample_names.size(); ++i ) {
405  if( i > 0 ) {
406  list += ",";
407  }
408  list += sample_names[i];
409  }
410  suc = ::bcf_hdr_set_samples( header_, list.c_str(), 0 );
411  }
412 
413  // Check the return code of the above calls to htslib.
414  // The htslib documentation is not clear on how to interpret the return values of this.
415  // It states (https://github.com/samtools/htslib/blob/develop/htslib/vcf.h):
416  // Returns 0 on success, -1 on error or a positive integer if the list
417  // contains samples not present in the VCF header. In such a case, the
418  // return value is the index of the offending sample.
419  // Is that index 1-based then?! Seriously? We checked the code and it seems it is. WTF.
420  if( suc < 0 ) {
421  throw std::runtime_error(
422  "Invalid list of sample names provided that cannot be used for constricting "
423  "the sample parsing of the VCF/BCF file."
424  );
425  } else if( suc > 0 ) {
426  // Fix to use 0-based index.
427  --suc;
428 
429  // Let's assume that htslib returns valid indices, and assert this.
430  assert( 0 <= suc && static_cast<size_t>(suc) < sample_names.size() );
431  throw std::runtime_error(
432  "Provided list of sample names contains entry '" + sample_names[suc] +
433  "', which is not part of the sample names in the file header, and hence cannot"
434  " be used for constricting the sample parsing of the VCF/BCF file."
435  );
436  }
437 }
438 
439 // =================================================================================================
440 // Internal Helpers
441 // =================================================================================================
442 
443 std::vector<std::string> VcfHeader::get_hrec_ids_( int hl_type ) const
444 {
445  std::vector<std::string> res;
446  for( int i = 0; i < header_->nhrec; ++i ) {
447  // We need to scan all hrec entries, and only process the ones we are interested in...
448  if( header_->hrec[i]->type != hl_type ) {
449  continue;
450  }
451  for( int j = 0; j < header_->hrec[i]->nkeys; ++j ) {
452  // Find the "ID" field, and copy it to our result.
453  if( std::strcmp( header_->hrec[i]->keys[j], "ID" ) == 0 ) {
454  res.push_back( std::string( header_->hrec[i]->vals[j] ));
455  }
456  }
457  }
458  return res;
459 }
460 
461 std::unordered_map<std::string, std::string> VcfHeader::get_hrec_values_( int hl_type, std::string const& id ) const
462 {
463  std::unordered_map<std::string, std::string> res;
464  bcf_hrec_t* hrec = ::bcf_hdr_get_hrec( header_, hl_type, "ID", id.c_str(), nullptr );
465 
466  if( !hrec ) {
467  throw std::runtime_error(
468  vcf_hl_type_to_string(hl_type) + " tag " + id + " not defined in the VCF/BCF header."
469  );
470  }
471  for( int i = 0; i < hrec->nkeys; ++i ) {
472  res[ std::string( hrec->keys[i] )] = std::string( hrec->vals[i] );
473  }
474  return res;
475 }
476 
477 VcfSpecification VcfHeader::get_specification_( int hl_type, std::string const& id) const
478 {
479  auto const int_id = ::bcf_hdr_id2int( header_, BCF_DT_ID, id.c_str() );
480  if( ! bcf_hdr_idinfo_exists( header_, hl_type, int_id )) {
481  throw std::runtime_error(
482  vcf_hl_type_to_string(hl_type) + " tag " + id + " not defined in the VCF/BCF header."
483  );
484  }
485 
486  VcfSpecification res;
487  res.id = id;
488 
489  // We use the same values in our Number and Type enums than the htslib-defined macro values,
490  // which is statically asserted. So here, we can simply cast to our enum values.
491  res.type = static_cast<VcfValueType>( bcf_hdr_id2type( header_, hl_type, int_id ));
492  res.special = static_cast<VcfValueSpecial>( bcf_hdr_id2length( header_, hl_type, int_id ));
493  res.number = bcf_hdr_id2number( header_, hl_type, int_id );
494 
495  // Description is a required entry, but there seems to be no macro in htslib for this.
496  // We can (hopefully?!) here simply assert that the hrec exists, because we above already
497  // tested that the int_id exists. Let's hope that htslib is conistent with this.
498  bcf_hrec_t* hrec = ::bcf_hdr_get_hrec( header_, hl_type, "ID", id.c_str(), nullptr );
499  assert( hrec );
500  auto const descr_key = ::bcf_hrec_find_key( hrec, "Description" );
501  if( descr_key >= 0 ) {
502  // It seems that htslib leaves the quotes around the description.
503  // That is ugly, let's remove!
504  res.description = utils::trim( std::string( hrec->vals[descr_key] ), "\"");
505  }
506  return res;
507 }
508 
509 bool VcfHeader::test_hl_entry_(
510  bool throwing,
511  int hl_type, std::string const& id,
512  bool with_type, VcfValueType type,
513  bool with_special, VcfValueSpecial special,
514  bool with_number, size_t number
515 ) const {
516  // We always want to test whether the given ID is defined in the header line type (hl_type,
517  // which can be BCF_HL_INFO, BCF_HL_FORMAT, etc).
518  // Let's use two ways of testing this. Because why not. This assertion function is typically
519  // called once per VCF file, so that doesn't cost us much, but gives more certainty that
520  // we are using htslib correctly.
521  bcf_hrec_t* hrec = ::bcf_hdr_get_hrec( header_, hl_type, "ID", id.c_str(), nullptr );
522  if( !hrec ) {
523  if( throwing ) {
524  throw std::runtime_error(
525  "Required " + vcf_hl_type_to_string(hl_type) + " tag " + id +
526  " is not defined in the VCF/BCF header."
527  );
528  } else {
529  return false;
530  }
531  }
532  auto const int_id = ::bcf_hdr_id2int( header_, BCF_DT_ID, id.c_str() );
533  if( ! bcf_hdr_idinfo_exists( header_, hl_type, int_id )) {
534  if( throwing ) {
535  throw std::runtime_error(
536  "Required " + vcf_hl_type_to_string(hl_type) + " tag " + id +
537  " is not defined in the VCF/BCF header."
538  );
539  } else {
540  return false;
541  }
542  }
543 
544  // If requested, test that the header line sets the correct data type (Integer, String, etc;
545  // this is one of the many circumstances where the word type is used over and over again in htslib).
546  if( with_type ) {
547  auto const def_type = bcf_hdr_id2type( header_, hl_type, int_id );
548  if( static_cast<int>( def_type ) != static_cast<int>( type ) ) {
549  if( throwing ) {
550  throw std::runtime_error(
551  vcf_hl_type_to_string(hl_type) + " tag " + id + " is defined in the VCF/BCF header "
552  "to be of value data type '" + vcf_value_type_to_string( def_type ) +
553  "', but data type '" + vcf_value_type_to_string( type ) + "' is required instead."
554  );
555  } else {
556  return false;
557  }
558  }
559  }
560 
561  // Same for the number of values. Here, we test both the kind of number values (fixed or something
562  // else), and the actual number, if needed. If an actual number is given, make sure
563  // that the special is set to fixed.
564  auto const def_special = bcf_hdr_id2length( header_, hl_type, int_id );
565  if( with_special && ( static_cast<int>( def_special ) != static_cast<int>( special ))) {
566  if( throwing ) {
567  throw std::runtime_error(
568  vcf_hl_type_to_string(hl_type) + " tag " + id + " is defined in the VCF/BCF header "
569  "to have '" + vcf_value_special_to_string( def_special ) + "' number of values, but '" +
570  vcf_value_special_to_string( special ) + "' is required instead."
571  );
572  } else {
573  return false;
574  }
575  }
576  if( with_number ) {
577  if( special != VcfValueSpecial::kFixed ) {
578  if( throwing ) {
579  throw std::runtime_error(
580  vcf_hl_type_to_string(hl_type) + " tag " + id + " is defined in the VCF/BCF header "
581  "to have '" + vcf_value_special_to_string( def_special ) + "' number of values, but '" +
582  vcf_value_special_to_string( special ) + "' with n=" + std::to_string( number ) +
583  " is required instead."
584  );
585  } else {
586  return false;
587  }
588  }
589  auto const def_number = bcf_hdr_id2number( header_, hl_type, int_id );
590  assert( with_special );
591  assert( special == VcfValueSpecial::kFixed );
592  assert( def_special == BCF_VL_FIXED );
593  if( number != static_cast<size_t>( def_number )) {
594  if( throwing ) {
595  throw std::runtime_error(
596  vcf_hl_type_to_string(hl_type) + " tag " + id + " is defined in the VCF/BCF header "
597  "to have '" + vcf_value_special_to_string( def_special ) + "' number of values with n=" +
598  std::to_string( def_number ) + ", but n=" + std::to_string( number ) +
599  " is required instead."
600  );
601  } else {
602  return false;
603  }
604  }
605  }
606 
607  return true;
608 }
609 
610 void VcfHeader::check_value_return_code_(
611  ::bcf_hdr_t* header, std::string const& id, int ht_type, int hl_type, int return_value
612 ) {
613  assert( hl_type == BCF_HL_INFO || hl_type == BCF_HL_FMT );
614  switch( return_value ) {
615  case -1: {
616  throw std::runtime_error(
617  vcf_hl_type_to_string( hl_type ) + " tag " + id + " not defined in the VCF/BCF header."
618  );
619  break;
620  }
621  case -2: {
622  bcf_hrec_t* hrec = ::bcf_hdr_get_hrec( header, hl_type, "ID", id.c_str(), nullptr );
623  int const hrec_key = ::bcf_hrec_find_key( hrec, "Type" );
624  std::string defined_type = ( hrec_key >= 0 ? std::string( hrec->vals[hrec_key] ) : "Unknown" );
625  // int const tag_id = bcf_hdr_id2int( header, BCF_DT_ID, id.c_str() );
626 
627  throw std::runtime_error(
628  "Clash between types defined in the header and encountered in the VCF/BCF record for " +
629  vcf_hl_type_to_string( hl_type ) + " tag " + id + ": Header defines type '" + defined_type +
630  "', but '" + vcf_value_type_to_string( ht_type ) + "' was requested instead."
631  );
632  break;
633  }
634  case -3: {
635  throw std::runtime_error(
636  vcf_hl_type_to_string( hl_type ) + " tag " + id + " not present in the VCF/BCF record."
637  );
638  break;
639  }
640  case -4: {
641  throw std::runtime_error(
642  vcf_hl_type_to_string( hl_type ) + " tag " + id + " retrieval could not be completed " +
643  "(e.g., out of memory)."
644  );
645  break;
646  }
647  // default:
648  // (void);
649  }
650 
651  // If we are here, the above part succeeded, which means, our return type could correctly be
652  // retrieved. Let's assert that this is also the type that was specified in the header,
653  // just to be sure that htslib does its job. Except for the genotype `GT` FORMAT field,
654  // because htslib treats that as a special case that is a string that gets converted to an
655  // int representation...
656  auto const int_id = ::bcf_hdr_id2int( header, BCF_DT_ID, id.c_str() );
657  assert( bcf_hdr_idinfo_exists( header, hl_type, int_id ) );
658  assert( id == "GT" || bcf_hdr_id2type( header, hl_type, int_id ) == static_cast<uint32_t>( ht_type ));
659  (void) int_id;
660 
661  // Assert that we are only left with valid, non-negative return codes.
662  // All negative ones, which signify errors, are caught above.
663  assert( return_value >= 0 );
664 }
665 
666 } // namespace population
667 } // namespace genesis
668 
669 #endif // htslib guard
genesis::placement::swap
void swap(Sample &lhs, Sample &rhs)
Definition: sample.cpp:104
genesis::population::VcfHeader::get_format_ids
std::vector< std::string > get_format_ids() const
Get a list of the ID names of all FORMAT fields in the header.
Definition: vcf_header.cpp:285
genesis::population::VcfHeader::get_format_values
std::unordered_map< std::string, std::string > get_format_values(std::string const &id) const
Get all key-value pairs describing a particular format field, given its ID.
Definition: vcf_header.cpp:295
genesis::population::VcfHeader::get_format_specification
VcfSpecification get_format_specification(std::string const &id) const
Get the required specification key-value-pairs for a given FORMAT entry.
Definition: vcf_header.cpp:290
genesis::population::VcfHeader::get_chromosome_length
size_t get_chromosome_length(std::string const &chrom_name) const
Get the length of a chromosome/contig/sequence, given its name.
Definition: vcf_header.cpp:174
genesis::population::VcfHeader::get_info_ids
std::vector< std::string > get_info_ids() const
Get a list of the ID names of all INFO fields in the header.
Definition: vcf_header.cpp:226
genesis::population::vcf_hl_type_to_string
std::string vcf_hl_type_to_string(int hl_type)
Internal helper function to convert htslib-internal BCF_HL_* header line type values to their string ...
Definition: vcf_common.cpp:205
genesis::utils::trim
std::string trim(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with trimmed white spaces.
Definition: string.cpp:602
genesis::population::VcfValueSpecial
VcfValueSpecial
Specification for special markers for the number of values expected for key-value-pairs of VCF/BCF fi...
Definition: vcf_common.hpp:105
genesis::population::VcfSpecification
Collect the four required keys that describe an INFO or FORMAT sub-field of VCF/BCF files.
Definition: vcf_common.hpp:150
genesis::population::VcfHeader::get_chromosomes
std::vector< std::string > get_chromosomes() const
Get a list of the chromosome/contig/sequence names used in the file.
Definition: vcf_header.cpp:140
genesis::population::VcfHeader::assert_format
void assert_format(std::string const &id) const
Assert that an FORMAT entry with a given ID is defined in the header of the VCF/BCF file.
Definition: vcf_header.cpp:300
genesis::population::VcfHeader::get_info_values
std::unordered_map< std::string, std::string > get_info_values(std::string const &id) const
Get all key-value pairs describing a particular info header line, given its ID.
Definition: vcf_header.cpp:236
vcf_header.hpp
genesis::population::VcfHeader::get_info_specification
VcfSpecification get_info_specification(std::string const &id) const
Get the required specification key-value-pairs for a given INFO entry.
Definition: vcf_header.cpp:231
genesis::population::VcfHeader::get_sample_index
size_t get_sample_index(std::string const &name) const
Get the index of a sample, given its name.
Definition: vcf_header.cpp:361
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: functions/genome_locus.hpp:48
genesis::population::VcfHeader::get_chromosome_values
std::unordered_map< std::string, std::string > get_chromosome_values(std::string const &chrom_name) const
Get all key-value-pairs describing a particular chromosome/contig/sequence, given its name.
Definition: vcf_header.cpp:193
string.hpp
Provides some commonly used string utility functions.
genesis::population::VcfHeader::version
std::string version() const
Return the VCF/BCF version string, e.g. "VCFv4.2".
Definition: vcf_header.cpp:131
genesis::population::VcfHeader::get_filter_values
std::unordered_map< std::string, std::string > get_filter_values(std::string const &id) const
Get all key-value pairs describing a particular filter header line, given its ID.
Definition: vcf_header.cpp:207
genesis::population::VcfHeader::get_sample_names
std::vector< std::string > get_sample_names() const
Return a list of the sample names (column headers) of the VCF/BCF file.
Definition: vcf_header.cpp:373
genesis::population::HtsFile::file_name
std::string const & file_name() const
Definition: hts_file.hpp:92
genesis::population::HtsFile::data
::htsFile * data()
Definition: hts_file.hpp:97
genesis::population::VcfHeader::has_info
bool has_info(std::string const &id) const
Return whether an INFO entry with a given ID is defined in the header of the VCF/BCF file.
Definition: vcf_header.cpp:261
genesis::population::VcfHeader::get_sample_count
size_t get_sample_count() const
Get the number of samples (columns) in the file.
Definition: vcf_header.cpp:344
genesis::population::VcfValueSpecial::kFixed
@ kFixed
Fixed number of values expected. In VCF, this is denoted simply by an integer number.
genesis::population::VcfHeader::~VcfHeader
~VcfHeader()
Definition: vcf_header.cpp:100
genesis::population::VcfHeader::VcfHeader
VcfHeader()=default
Create a default (empty) instance.
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::vcf_value_special_to_string
std::string vcf_value_special_to_string(VcfValueSpecial vl_type_num)
Definition: vcf_common.cpp:172
genesis::population::VcfHeader::operator=
VcfHeader & operator=(VcfHeader const &)=delete
genesis::population::VcfHeader::assert_filter
void assert_filter(std::string const &id) const
Assert that a FILTER entry with a given ID is defined in the header of the VCF/BCF file.
Definition: vcf_header.cpp:212
genesis::population::VcfHeader::has_format
bool has_format(std::string const &id) const
Return whether a FORMAT entry with a given ID is defined in the header of the VCF/BCF file.
Definition: vcf_header.cpp:320
genesis::population::VcfValueType
VcfValueType
Specification for the data type of the values expected in key-value-pairs of VCF/BCF files.
Definition: vcf_common.hpp:88
genesis::population::VcfHeader::assert_info
void assert_info(std::string const &id) const
Assert that an INFO entry with a given ID is defined in the header of the VCF/BCF file.
Definition: vcf_header.cpp:241
genesis::population::VcfHeader::has_filter
bool has_filter(std::string const &id) const
Return whether a FILTER entry with a given ID is defined in the header of the VCF/BCF file.
Definition: vcf_header.cpp:217
genesis::population::VcfHeader::get_filter_ids
std::vector< std::string > get_filter_ids() const
Get a list of the ID names of all FILTER entries in the header.
Definition: vcf_header.cpp:202
genesis::population::vcf_value_type_to_string
std::string vcf_value_type_to_string(VcfValueType ht_type)
Definition: vcf_common.cpp:142
genesis::population::VcfHeader
Capture the information from a header of a VCF/BCF file.
Definition: vcf_header.hpp:102
genesis::population::HtsFile
Wrap an ::htsFile struct.
Definition: hts_file.hpp:56
genesis::population::VcfHeader::set_samples
void set_samples(std::vector< std::string > const &sample_names, bool inverse_sample_names=false)
Speficy a subset of samples to be parsed.
Definition: vcf_header.cpp:384
genesis::population::VcfHeader::get_sample_name
std::string get_sample_name(size_t index) const
Get the name of a sample given its index.
Definition: vcf_header.cpp:350