39 #include <htslib/hts.h>
40 #include <htslib/vcf.h>
50 namespace population {
59 "Definitions of VCF_REF in htslib and of VariantType::kRef in genesis differ. "
60 "Please submit a bug report at https://github.com/lczech/genesis/issues"
64 "Definitions of VCF_SNP in htslib and of VariantType::kSnp in genesis differ. "
65 "Please submit a bug report at https://github.com/lczech/genesis/issues"
69 "Definitions of VCF_MNP in htslib and of VariantType::kMnp in genesis differ. "
70 "Please submit a bug report at https://github.com/lczech/genesis/issues"
74 "Definitions of VCF_INDEL in htslib and of VariantType::kIndel in genesis differ. "
75 "Please submit a bug report at https://github.com/lczech/genesis/issues"
79 "Definitions of VCF_OTHER in htslib and of VariantType::kOther in genesis differ. "
80 "Please submit a bug report at https://github.com/lczech/genesis/issues"
84 "Definitions of VCF_BND in htslib and of VariantType::kBreakend in genesis differ. "
85 "Please submit a bug report at https://github.com/lczech/genesis/issues"
89 "Definitions of VCF_OVERLAP in htslib and of VariantType::kOverlap in genesis differ. "
90 "Please submit a bug report at https://github.com/lczech/genesis/issues"
99 record_ = ::bcf_init();
101 throw std::runtime_error(
"Failed to default-initialize VcfRecord bcf1_t data structure." );
108 record_ = ::bcf_init();
110 throw std::runtime_error(
"Failed to initialize VcfRecord bcf1_t data structure." );
117 record_ = ::bcf_dup( bcf1 );
119 throw std::runtime_error(
"Failed to copy-initialize VcfRecord bcf1_t data structure." );
126 ::bcf_destroy( record_ );
128 free( info_dest_string_ );
129 free( info_dest_float_ );
130 free( info_dest_int_ );
142 if(
this == &other ) {
153 std::swap( info_dest_string_, other.info_dest_string_ );
154 std::swap( info_dest_float_, other.info_dest_float_ );
155 std::swap( info_dest_int_, other.info_dest_int_ );
156 std::swap( info_ndest_string_, other.info_ndest_string_ );
157 std::swap( info_ndest_float_, other.info_ndest_float_ );
158 std::swap( info_ndest_int_, other.info_ndest_int_ );
167 ::bcf_unpack( record_, BCF_UN_STR );
172 std::string chr = ::bcf_hdr_id2name( header_->
data(), record_->rid );
174 throw std::runtime_error(
175 "Malformed VCF file: empty chromosome name"
186 assert( record_->pos >= 0 );
187 return record_->pos + 1;
192 ::bcf_unpack( record_, BCF_UN_STR );
193 return std::string( record_->d.id );
198 auto const pos_id = std::string(
get_id() !=
"." ?
" (" +
get_id() +
")" :
"" );
206 ::bcf_unpack( record_, BCF_UN_STR );
207 assert( record_->n_allele > 0 );
208 assert( std::strlen(record_->d.allele[0]) ==
static_cast<size_t>( record_->rlen ));
209 return std::string( record_->d.allele[0] );
215 ::bcf_unpack( record_, BCF_UN_STR );
216 assert( record_->n_allele > 0 );
217 auto ret = std::vector<std::string>( record_->n_allele - 1 );
218 for(
size_t i = 1; i < record_->n_allele; ++i ) {
219 ret[ i - 1 ] = std::string( record_->d.allele[i] );
227 ::bcf_unpack( record_, BCF_UN_STR );
228 assert( record_->n_allele > 0 );
229 if( index + 1 >= record_->n_allele ) {
230 throw std::invalid_argument(
231 "Cannot retrieve alternative at index " +
std::to_string(index) +
", as the record " +
232 "line only has " +
std::to_string( record_->n_allele - 1 ) +
" alternative alleles."
235 assert( index + 1 < record_->n_allele );
236 return record_->d.allele[ index + 1 ];
243 ::bcf_unpack( record_, BCF_UN_STR );
244 assert( record_->n_allele > 0 );
245 return record_->n_allele - 1;
252 ::bcf_unpack( record_, BCF_UN_STR );
253 auto ret = std::vector<std::string>( record_->n_allele );
254 for(
size_t i = 0; i < record_->n_allele; ++i ) {
255 ret[i] = std::string( record_->d.allele[i] );
262 ::bcf_unpack( record_, BCF_UN_STR );
263 assert( record_->n_allele > 0 );
264 if( index >= record_->n_allele ) {
265 throw std::invalid_argument(
266 "Cannot retrieve variant at index " +
std::to_string(index) +
", as the record " +
267 "line only has " +
std::to_string( record_->n_allele ) +
" variants (reference + " +
268 "alternative alleles)."
271 assert( index < record_->n_allele );
272 return record_->d.allele[ index ];
277 ::bcf_unpack( record_, BCF_UN_STR );
278 assert( record_->n_allele > 0 );
279 return record_->n_allele;
284 return static_cast<VariantType>( ::bcf_get_variant_types( record_ ));
294 if( alt_index >= record_->n_allele ) {
295 throw std::runtime_error(
297 " out of bounds of the number of alleles " +
std::to_string( record_->n_allele ) +
301 return static_cast<VariantType>( ::bcf_get_variant_type( record_,
static_cast<int>( alt_index )));
306 return ::bcf_is_snp( record_ );
316 bcf_unpack( record_, BCF_UN_STR );
317 for(
size_t i = 0; i < record_->n_allele; ++i) {
322 if( record_->d.allele[i][1] != 0 ) {
325 if( record_->d.allele[i][0] ==
'.' ) {
328 if( record_->d.allele[i][0] ==
'*' && i == 0 ) {
337 return record_->qual;
346 ::bcf_unpack( record_, BCF_UN_FLT );
347 auto ret = std::vector<std::string>();
348 for(
size_t i = 0; i < static_cast<size_t>( record_->d.n_flt ); ++i ) {
349 ret.push_back( std::string( bcf_hdr_int2id( header_->
data(), BCF_DT_ID, record_->d.flt[i] )));
357 char* cstr =
new char[ filter.length() + 1] ;
358 std::strcpy( cstr, filter.c_str() );
361 int const res = ::bcf_has_filter( header_->
data(), record_, cstr );
366 throw std::runtime_error(
"Filter '" + filter +
"' not defined in VCF/BCF header." );
378 char pass[] =
"PASS";
379 return ::bcf_has_filter( header_->
data(), record_, pass );
388 ::bcf_unpack( record_, BCF_UN_INFO );
389 auto ret = std::vector<std::string>( record_->n_info );
390 for(
size_t i = 0; i < static_cast<size_t>( record_->n_info ); ++i ) {
391 ret[i] = std::string( bcf_hdr_int2id( header_->
data(), BCF_DT_ID, record_->d.info[i].key ));
403 return ::bcf_get_info( header_->
data(), record_,
id ) !=
nullptr;
417 if( ! ::bcf_get_info( header_->
data(), record_,
id )) {
418 throw std::runtime_error(
419 "Required INFO tag " + std::string(
id ) +
" is not present in the record at " +
at()
434 auto const len = get_info_ptr_(
435 id, BCF_HT_STR,
reinterpret_cast<void**
>( &info_dest_string_ ), &info_ndest_string_
438 assert( info_ndest_string_ >= 0 );
439 assert( info_ndest_string_ >= len );
451 destination.assign(
static_cast<char*
>( info_dest_string_ ), len );
456 std::vector<double> result;
464 auto const len = get_info_ptr_(
465 id, BCF_HT_REAL,
reinterpret_cast<void**
>( &info_dest_float_ ), &info_ndest_float_
468 assert( info_ndest_float_ >= 0 );
469 assert( info_ndest_float_ >= len );
472 destination.resize( len );
473 for(
int i = 0; i < len; ++i ) {
474 destination[i] =
static_cast<double>(
static_cast<float*
>(info_dest_float_)[i] );
480 std::vector<int32_t> result;
488 auto const len = get_info_ptr_(
489 id, BCF_HT_INT,
reinterpret_cast<void**
>( &info_dest_int_ ), &info_ndest_int_
492 assert( info_ndest_int_ >= 0 );
493 assert( info_ndest_int_ >= len );
496 destination.resize( len );
497 for(
int i = 0; i < len; ++i ) {
498 destination[i] =
static_cast<int32_t*
>(info_dest_int_)[i];
505 return get_info_ptr_(
id, BCF_HT_FLAG,
nullptr,
nullptr );
514 ::bcf_unpack( record_, BCF_UN_FMT );
515 auto ret = std::vector<std::string>( record_->n_fmt );
516 for(
size_t i = 0; i < static_cast<size_t>( record_->n_fmt ); ++i ) {
517 ret[i] = std::string( bcf_hdr_int2id( header_->
data(), BCF_DT_ID, record_->d.fmt[i].id ));
530 return ::bcf_get_fmt( header_->
data(), record_,
id ) !=
nullptr;
541 if( ! ::bcf_get_fmt( header_->
data(), record_,
id )) {
542 throw std::runtime_error(
543 "Required FORMAT tag " + std::string(
id ) +
" is not present in the record at " +
at()
580 std::string
const&
id
599 std::string
const&
id
618 std::string
const&
id
632 bool const good = ( ::bcf_read1( source.
data(), header_->
data(), record_ ) == 0 );
643 int VcfRecord::get_info_ptr_( std::string
const&
id,
int ht_type,
void** dest,
int* ndest)
const
648 int const len = ::bcf_get_info_values( header_->
data(), record_,
id.c_str(), dest, ndest, ht_type );
649 VcfHeader::check_value_return_code_( header_->
data(),
id, ht_type, BCF_HL_INFO, len );
652 assert( !ndest || ( *ndest >= 0 && *ndest >= len ));
659 #endif // htslib guard