38 #include <htslib/hts.h>
39 #include <htslib/vcf.h>
48 namespace population {
74 header_ = ::bcf_hdr_init( mode.c_str() );
76 throw std::runtime_error(
"Failed to initialize VcfHeader bcf_hdr_t data structure." );
83 header_ = ::bcf_hdr_read( hts_file.
data() );
85 throw std::runtime_error(
86 "Failed to initialize VcfHeader bcf_hdr_t data structure for file " +
94 header_ = ::bcf_hdr_dup( bcf_hdr );
96 throw std::runtime_error(
"Failed to copy-initialize VcfHeader bcf_hdr_t data structure." );
103 ::bcf_hdr_destroy( header_ );
120 if(
this == &other ) {
133 return std::string( ::bcf_hdr_get_version( header_ ));
147 const char** seqnames = ::bcf_hdr_seqnames( header_, &nseq );
151 if( nseq > 0 && !seqnames ) {
152 throw std::runtime_error(
153 "Cannot obtain chromosome/contig/sequence names from VCF/BCF header."
158 auto res = std::vector<std::string>( nseq );
159 for(
size_t i = 0; i < static_cast<size_t>(nseq); ++i ) {
160 res[i] = std::string( seqnames[i] );
164 assert( res[i] == std::string( ::bcf_hdr_id2name( header_, i )));
168 if( seqnames !=
nullptr ) {
176 if( chrom_name.empty() ) {
177 throw std::invalid_argument(
"Invalid chromosome name: empty" );
180 auto const id = ::bcf_hdr_name2id( header_, chrom_name.c_str() );
182 throw std::invalid_argument(
"Invalid chromosome name '" + chrom_name +
"'" );
185 size_t const result = header_->id[BCF_DT_CTG][id].val->info[0];
195 return get_hrec_values_( BCF_HL_CTG, chrom_name );
204 return get_hrec_ids_( BCF_HL_FLT );
209 return get_hrec_values_( BCF_HL_FLT,
id );
214 test_hl_entry_(
true, BCF_HL_FLT,
id,
false, {},
false, {},
false, 0 );
219 return test_hl_entry_(
false, BCF_HL_FLT,
id,
false, {},
false, {},
false, 0 );
228 return get_hrec_ids_( BCF_HL_INFO );
233 return get_specification_( BCF_HL_INFO,
id );
238 return get_hrec_values_( BCF_HL_INFO,
id );
243 test_hl_entry_(
true, BCF_HL_INFO,
id,
false, {},
false, {},
false, 0 );
248 test_hl_entry_(
true, BCF_HL_INFO,
id,
true, type,
false, {},
false, 0 );
253 test_hl_entry_(
true, BCF_HL_INFO,
id,
true, type,
true, num,
false, 0 );
263 return test_hl_entry_(
false, BCF_HL_INFO,
id,
false, {},
false, {},
false, 0 );
268 return test_hl_entry_(
false, BCF_HL_INFO,
id,
true, type,
false, {},
false, 0 );
273 return test_hl_entry_(
false, BCF_HL_INFO,
id,
true, type,
true, num,
false, 0 );
287 return get_hrec_ids_( BCF_HL_FMT );
292 return get_specification_( BCF_HL_FMT,
id );
297 return get_hrec_values_( BCF_HL_FMT,
id );
302 test_hl_entry_(
true, BCF_HL_FMT,
id,
false, {},
false, {},
false, 0 );
307 test_hl_entry_(
true, BCF_HL_FMT,
id,
true, type,
false, {},
false, 0 );
312 test_hl_entry_(
true, BCF_HL_FMT,
id,
true, type,
true, num,
false, 0 );
322 return test_hl_entry_(
false, BCF_HL_FMT,
id,
false, {},
false, {},
false, 0 );
327 return test_hl_entry_(
false, BCF_HL_FMT,
id,
true, type,
false, {},
false, 0 );
332 return test_hl_entry_(
false, BCF_HL_FMT,
id,
true, type,
true, num,
false, 0 );
346 assert( bcf_hdr_nsamples(header_) == header_->n[BCF_DT_SAMPLE] );
347 return header_->n[BCF_DT_SAMPLE];
353 throw std::invalid_argument(
354 "Cannot get sample name for sample at index " +
std::to_string(index) +
358 return header_->samples[index];
363 assert( bcf_hdr_nsamples(header_) == header_->n[BCF_DT_SAMPLE] );
364 size_t const sample_count = header_->n[BCF_DT_SAMPLE];
365 for(
size_t i = 0; i < sample_count; ++i ) {
366 if( strcmp( name.c_str(), header_->samples[i] ) == 0 ) {
370 throw std::runtime_error(
"Sample name '" + name +
"' not found in VCF file." );
375 assert( bcf_hdr_nsamples(header_) == header_->n[BCF_DT_SAMPLE] );
376 size_t const sample_count = header_->n[BCF_DT_SAMPLE];
377 auto result = std::vector<std::string>( sample_count );
378 for(
size_t i = 0; i < sample_count; ++i ) {
379 result[i] = std::string( header_->samples[i] );
385 std::vector<std::string>
const& sample_names,
386 bool inverse_sample_names
390 if( sample_names.empty() ) {
393 if( inverse_sample_names ) {
396 suc = ::bcf_hdr_set_samples( header_,
"-", 0 );
398 suc = ::bcf_hdr_set_samples( header_,
nullptr, 0 );
403 std::string list = ( inverse_sample_names ?
"^" :
"" );
404 for(
size_t i = 0; i < sample_names.size(); ++i ) {
408 list += sample_names[i];
410 suc = ::bcf_hdr_set_samples( header_, list.c_str(), 0 );
421 throw std::runtime_error(
422 "Invalid list of sample names provided that cannot be used for constricting "
423 "the sample parsing of the VCF/BCF file."
425 }
else if( suc > 0 ) {
430 assert( 0 <= suc &&
static_cast<size_t>(suc) < sample_names.size() );
431 throw std::runtime_error(
432 "Provided list of sample names contains entry '" + sample_names[suc] +
433 "', which is not part of the sample names in the file header, and hence cannot"
434 " be used for constricting the sample parsing of the VCF/BCF file."
443 std::vector<std::string> VcfHeader::get_hrec_ids_(
int hl_type )
const
445 std::vector<std::string> res;
446 for(
int i = 0; i < header_->nhrec; ++i ) {
448 if( header_->hrec[i]->type != hl_type ) {
451 for(
int j = 0; j < header_->hrec[i]->nkeys; ++j ) {
453 if( std::strcmp( header_->hrec[i]->keys[j],
"ID" ) == 0 ) {
454 res.push_back( std::string( header_->hrec[i]->vals[j] ));
461 std::unordered_map<std::string, std::string> VcfHeader::get_hrec_values_(
int hl_type, std::string
const&
id )
const
463 std::unordered_map<std::string, std::string> res;
464 bcf_hrec_t* hrec = ::bcf_hdr_get_hrec( header_, hl_type,
"ID",
id.c_str(),
nullptr );
467 throw std::runtime_error(
471 for(
int i = 0; i < hrec->nkeys; ++i ) {
472 res[ std::string( hrec->keys[i] )] = std::string( hrec->vals[i] );
477 VcfSpecification VcfHeader::get_specification_(
int hl_type, std::string
const&
id)
const
479 auto const int_id = ::bcf_hdr_id2int( header_, BCF_DT_ID,
id.c_str() );
480 if( ! bcf_hdr_idinfo_exists( header_, hl_type, int_id )) {
481 throw std::runtime_error(
486 VcfSpecification res;
491 res.type =
static_cast<VcfValueType>( bcf_hdr_id2type( header_, hl_type, int_id ));
492 res.special =
static_cast<VcfValueSpecial>( bcf_hdr_id2length( header_, hl_type, int_id ));
493 res.number = bcf_hdr_id2number( header_, hl_type, int_id );
498 bcf_hrec_t* hrec = ::bcf_hdr_get_hrec( header_, hl_type,
"ID",
id.c_str(),
nullptr );
500 auto const descr_key = ::bcf_hrec_find_key( hrec,
"Description" );
501 if( descr_key >= 0 ) {
504 res.description =
utils::trim( std::string( hrec->vals[descr_key] ),
"\"");
509 bool VcfHeader::test_hl_entry_(
511 int hl_type, std::string
const&
id,
514 bool with_number,
size_t number
521 bcf_hrec_t* hrec = ::bcf_hdr_get_hrec( header_, hl_type,
"ID",
id.c_str(),
nullptr );
524 throw std::runtime_error(
526 " is not defined in the VCF/BCF header."
532 auto const int_id = ::bcf_hdr_id2int( header_, BCF_DT_ID,
id.c_str() );
533 if( ! bcf_hdr_idinfo_exists( header_, hl_type, int_id )) {
535 throw std::runtime_error(
537 " is not defined in the VCF/BCF header."
547 auto const def_type = bcf_hdr_id2type( header_, hl_type, int_id );
548 if(
static_cast<int>( def_type ) !=
static_cast<int>( type ) ) {
550 throw std::runtime_error(
564 auto const def_special = bcf_hdr_id2length( header_, hl_type, int_id );
565 if( with_special && (
static_cast<int>( def_special ) !=
static_cast<int>( special ))) {
567 throw std::runtime_error(
579 throw std::runtime_error(
583 " is required instead."
589 auto const def_number = bcf_hdr_id2number( header_, hl_type, int_id );
590 assert( with_special );
592 assert( def_special == BCF_VL_FIXED );
593 if( number !=
static_cast<size_t>( def_number )) {
595 throw std::runtime_error(
599 " is required instead."
610 void VcfHeader::check_value_return_code_(
611 ::bcf_hdr_t* header, std::string
const&
id,
int ht_type,
int hl_type,
int return_value
613 assert( hl_type == BCF_HL_INFO || hl_type == BCF_HL_FMT );
614 switch( return_value ) {
616 throw std::runtime_error(
622 bcf_hrec_t* hrec = ::bcf_hdr_get_hrec( header, hl_type,
"ID",
id.c_str(),
nullptr );
623 int const hrec_key = ::bcf_hrec_find_key( hrec,
"Type" );
624 std::string defined_type = ( hrec_key >= 0 ? std::string( hrec->vals[hrec_key] ) :
"Unknown" );
627 throw std::runtime_error(
628 "Clash between types defined in the header and encountered in the VCF/BCF record for " +
635 throw std::runtime_error(
641 throw std::runtime_error(
643 "(e.g., out of memory)."
656 auto const int_id = ::bcf_hdr_id2int( header, BCF_DT_ID,
id.c_str() );
657 assert( bcf_hdr_idinfo_exists( header, hl_type, int_id ) );
658 assert(
id ==
"GT" || bcf_hdr_id2type( header, hl_type, int_id ) ==
static_cast<uint32_t
>( ht_type ));
663 assert( return_value >= 0 );
669 #endif // htslib guard