39 #include <htslib/hts.h>
40 #include <htslib/vcf.h>
50 namespace population {
59 "Definitions of VCF_REF in htslib and of VariantType::kRef in genesis differ. "
60 "Please submit a bug report at https://github.com/lczech/genesis/issues"
64 "Definitions of VCF_SNP in htslib and of VariantType::kSnp in genesis differ. "
65 "Please submit a bug report at https://github.com/lczech/genesis/issues"
69 "Definitions of VCF_MNP in htslib and of VariantType::kMnp in genesis differ. "
70 "Please submit a bug report at https://github.com/lczech/genesis/issues"
74 "Definitions of VCF_INDEL in htslib and of VariantType::kIndel in genesis differ. "
75 "Please submit a bug report at https://github.com/lczech/genesis/issues"
79 "Definitions of VCF_OTHER in htslib and of VariantType::kOther in genesis differ. "
80 "Please submit a bug report at https://github.com/lczech/genesis/issues"
84 "Definitions of VCF_BND in htslib and of VariantType::kBreakend in genesis differ. "
85 "Please submit a bug report at https://github.com/lczech/genesis/issues"
89 "Definitions of VCF_OVERLAP in htslib and of VariantType::kOverlap in genesis differ. "
90 "Please submit a bug report at https://github.com/lczech/genesis/issues"
99 record_ = ::bcf_init();
101 throw std::runtime_error(
"Failed to default-initialize VcfRecord bcf1_t data structure." );
108 record_ = ::bcf_init();
110 throw std::runtime_error(
"Failed to initialize VcfRecord bcf1_t data structure." );
117 record_ = ::bcf_dup( bcf1 );
119 throw std::runtime_error(
"Failed to copy-initialize VcfRecord bcf1_t data structure." );
126 ::bcf_destroy( record_ );
128 free( info_dest_string_ );
129 free( info_dest_float_ );
130 free( info_dest_int_ );
142 if(
this == &other ) {
153 std::swap( info_dest_string_, other.info_dest_string_ );
154 std::swap( info_dest_float_, other.info_dest_float_ );
155 std::swap( info_dest_int_, other.info_dest_int_ );
156 std::swap( info_ndest_string_, other.info_ndest_string_ );
157 std::swap( info_ndest_float_, other.info_ndest_float_ );
158 std::swap( info_ndest_int_, other.info_ndest_int_ );
167 ::bcf_unpack( record_, BCF_UN_STR );
172 std::string chr = ::bcf_hdr_id2name( header_->
data(), record_->rid );
174 throw std::runtime_error(
175 "Malformed VCF file: empty chromosome name"
186 assert( record_->pos >= 0 );
187 return record_->pos + 1;
192 ::bcf_unpack( record_, BCF_UN_STR );
193 return std::string( record_->d.id );
198 auto const pos_id = std::string(
get_id() !=
"." ?
" (" +
get_id() +
")" :
"" );
206 ::bcf_unpack( record_, BCF_UN_STR );
207 assert( record_->n_allele > 0 );
208 assert( std::strlen(record_->d.allele[0]) ==
static_cast<size_t>( record_->rlen ));
209 return std::string( record_->d.allele[0] );
215 ::bcf_unpack( record_, BCF_UN_STR );
216 assert( record_->n_allele > 0 );
217 auto ret = std::vector<std::string>( record_->n_allele - 1 );
218 for(
size_t i = 1; i < record_->n_allele; ++i ) {
219 ret[ i - 1 ] = std::string( record_->d.allele[i] );
227 ::bcf_unpack( record_, BCF_UN_STR );
228 assert( record_->n_allele > 0 );
229 if( index + 1 >= record_->n_allele ) {
230 throw std::invalid_argument(
231 "Cannot retrieve alternative at index " +
std::to_string(index) +
", as the record " +
232 "line only has " +
std::to_string( record_->n_allele - 1 ) +
" alternative alleles."
235 assert( index + 1 < record_->n_allele );
236 return record_->d.allele[ index + 1 ];
243 ::bcf_unpack( record_, BCF_UN_STR );
244 assert( record_->n_allele > 0 );
245 return record_->n_allele - 1;
252 ::bcf_unpack( record_, BCF_UN_STR );
253 auto ret = std::vector<std::string>( record_->n_allele );
254 for(
size_t i = 0; i < record_->n_allele; ++i ) {
255 ret[i] = std::string( record_->d.allele[i] );
262 ::bcf_unpack( record_, BCF_UN_STR );
263 assert( record_->n_allele > 0 );
264 if( index >= record_->n_allele ) {
265 throw std::invalid_argument(
266 "Cannot retrieve variant at index " +
std::to_string(index) +
", as the record " +
267 "line only has " +
std::to_string( record_->n_allele ) +
" variants (reference + " +
268 "alternative alleles)."
271 assert( index < record_->n_allele );
272 return record_->d.allele[ index ];
277 ::bcf_unpack( record_, BCF_UN_STR );
278 assert( record_->n_allele > 0 );
279 return record_->n_allele;
284 return static_cast<VariantType>( ::bcf_get_variant_types( record_ ));
294 if( alt_index >= record_->n_allele ) {
295 throw std::runtime_error(
297 " out of bounds of the number of alleles " +
std::to_string( record_->n_allele ) +
301 return static_cast<VariantType>( ::bcf_get_variant_type( record_,
static_cast<int>( alt_index )));
306 return ::bcf_is_snp( record_ );
311 return record_->qual;
320 ::bcf_unpack( record_, BCF_UN_FLT );
321 auto ret = std::vector<std::string>();
322 for(
size_t i = 0; i < static_cast<size_t>( record_->d.n_flt ); ++i ) {
323 ret.push_back( std::string( bcf_hdr_int2id( header_->
data(), BCF_DT_ID, record_->d.flt[i] )));
331 char* cstr =
new char[ filter.length() + 1] ;
332 std::strcpy( cstr, filter.c_str() );
335 int const res = ::bcf_has_filter( header_->
data(), record_, cstr );
340 throw std::runtime_error(
"Filter '" + filter +
"' not defined in VCF/BCF header." );
350 char pass[] =
"PASS";
351 return ::bcf_has_filter( header_->
data(), record_, pass );
360 ::bcf_unpack( record_, BCF_UN_INFO );
361 auto ret = std::vector<std::string>( record_->n_info );
362 for(
size_t i = 0; i < static_cast<size_t>( record_->n_info ); ++i ) {
363 ret[i] = std::string( bcf_hdr_int2id( header_->
data(), BCF_DT_ID, record_->d.info[i].key ));
375 return ::bcf_get_info( header_->
data(), record_,
id ) !=
nullptr;
389 if( ! ::bcf_get_info( header_->
data(), record_,
id )) {
390 throw std::runtime_error(
391 "Required INFO tag " + std::string(
id ) +
" is not present in the record at " +
at()
406 auto const len = get_info_ptr_(
407 id, BCF_HT_STR,
reinterpret_cast<void**
>( &info_dest_string_ ), &info_ndest_string_
410 assert( info_ndest_string_ >= 0 );
411 assert( info_ndest_string_ >= len );
423 destination.assign(
static_cast<char*
>( info_dest_string_ ), len );
428 std::vector<double> result;
436 auto const len = get_info_ptr_(
437 id, BCF_HT_REAL,
reinterpret_cast<void**
>( &info_dest_float_ ), &info_ndest_float_
440 assert( info_ndest_float_ >= 0 );
441 assert( info_ndest_float_ >= len );
444 destination.resize( len );
445 for(
int i = 0; i < len; ++i ) {
446 destination[i] =
static_cast<double>(
static_cast<float*
>(info_dest_float_)[i] );
452 std::vector<int32_t> result;
460 auto const len = get_info_ptr_(
461 id, BCF_HT_INT,
reinterpret_cast<void**
>( &info_dest_int_ ), &info_ndest_int_
464 assert( info_ndest_int_ >= 0 );
465 assert( info_ndest_int_ >= len );
468 destination.resize( len );
469 for(
int i = 0; i < len; ++i ) {
470 destination[i] =
static_cast<int32_t*
>(info_dest_int_)[i];
477 return get_info_ptr_(
id, BCF_HT_FLAG,
nullptr,
nullptr );
486 ::bcf_unpack( record_, BCF_UN_FMT );
487 auto ret = std::vector<std::string>( record_->n_fmt );
488 for(
size_t i = 0; i < static_cast<size_t>( record_->n_fmt ); ++i ) {
489 ret[i] = std::string( bcf_hdr_int2id( header_->
data(), BCF_DT_ID, record_->d.fmt[i].id ));
502 return ::bcf_get_fmt( header_->
data(), record_,
id ) !=
nullptr;
513 if( ! ::bcf_get_fmt( header_->
data(), record_,
id )) {
514 throw std::runtime_error(
515 "Required FORMAT tag " + std::string(
id ) +
" is not present in the record at " +
at()
552 std::string
const&
id
571 std::string
const&
id
590 std::string
const&
id
604 bool const good = ( ::bcf_read1( source.
data(), header_->
data(), record_ ) == 0 );
615 int VcfRecord::get_info_ptr_( std::string
const&
id,
int ht_type,
void** dest,
int* ndest)
const
620 int const len = ::bcf_get_info_values( header_->
data(), record_,
id.c_str(), dest, ndest, ht_type );
621 VcfHeader::check_value_return_code_( header_->
data(),
id, ht_type, BCF_HL_INFO, len );
624 assert( !ndest || ( *ndest >= 0 && *ndest >= len ));
631 #endif // htslib guard