45 namespace population {
61 template<
class T,
class R>
63 std::string
const& filename,
65 std::vector<size_t>
const& sample_indices,
66 bool inverse_sample_indices,
67 std::vector<bool>
const& sample_filter
71 assert( sample_indices.empty() || sample_filter.empty() );
73 std::shared_ptr<T> input;
74 if( ! sample_indices.empty() ) {
81 auto const smp_cnt = (*input)->samples.size();
85 auto max_it = std::max_element( sample_indices.begin(), sample_indices.end() );
86 if( *max_it + 1 > smp_cnt ) {
87 throw std::invalid_argument(
88 "In " + filename +
": "
89 "Cannot create sample filter for the input file, as the filter index list contains "
91 "while the input file only contains " +
std::to_string( smp_cnt ) +
" samples."
97 if( inverse_sample_indices ) {
100 input = std::make_shared<T>(
106 }
else if( ! sample_filter.empty() ) {
107 input = std::make_shared<T>(
113 input = std::make_shared<T>(
127 #ifdef GENESIS_HTSLIB
130 std::string
const& filename,
136 auto input = std::make_shared<SamVariantInputIterator>( reader );
137 input->input_file( filename );
142 auto cur = input->begin();
143 auto end = input->end();
160 for(
size_t i = 0; i < cur.sample_size(); ++i ) {
171 [ input, cur, end ](
Variant& variant )
mutable {
173 variant = std::move( *cur );
184 #endif // GENESIS_HTSLIB
194 std::string
const& filename,
196 std::vector<size_t>
const& sample_indices,
197 bool inverse_sample_indices,
198 std::vector<bool>
const& sample_filter
204 filename, reader, sample_indices, inverse_sample_indices, sample_filter
211 filename, {
".gz",
".plp",
".mplp",
".pileup",
".mpileup" }
216 for(
size_t i = 0; i < (*input)->samples.size(); ++i ) {
217 data.sample_names.push_back(
"" );
223 [ input ](
Variant& variant )
mutable ->
bool {
226 variant = std::move( *it );
238 std::string
const& filename,
242 filename, reader, std::vector<size_t>{},
false, std::vector<bool>{}
247 std::string
const& filename,
248 std::vector<size_t>
const& sample_indices,
249 bool inverse_sample_indices,
253 filename, reader, sample_indices, inverse_sample_indices, std::vector<bool>{}
258 std::string
const& filename,
259 std::vector<bool>
const& sample_filter,
263 filename, reader, std::vector<size_t>{},
false, sample_filter
272 std::string
const& filename,
273 std::vector<size_t>
const& sample_indices,
274 bool inverse_sample_indices,
275 std::vector<bool>
const& sample_filter
282 filename,
SyncReader(), sample_indices, inverse_sample_indices, sample_filter
293 for(
size_t i = 0; i < (*input)->samples.size(); ++i ) {
294 data.sample_names.push_back(
"" );
300 [ input ](
Variant& variant )
mutable {
301 auto& sync_it = *input;
303 variant = std::move( *sync_it );
315 std::string
const& filename
318 filename, std::vector<size_t>{},
false, std::vector<bool>{}
323 std::string
const& filename,
324 std::vector<size_t>
const& sample_indices,
325 bool inverse_sample_indices
328 filename, sample_indices, inverse_sample_indices, std::vector<bool>{}
333 std::string
const& filename,
334 std::vector<bool>
const& sample_filter
337 filename, std::vector<size_t>{},
false, sample_filter
346 #ifdef GENESIS_HTSLIB
353 std::string
const& filename,
354 std::vector<std::string>
const& sample_names,
355 bool inverse_sample_names,
359 bool use_allelic_depth,
361 bool only_filter_pass
366 auto input = std::make_shared<VcfInputIterator>( filename, sample_names, inverse_sample_names );
371 throw std::runtime_error(
372 "Cannot iterate over VCF file " + filename +
" using the \"AD\" FORMAT " +
373 "field to count allelic depths, as that field is not part of the VCF file."
386 [ input, pool_samples, use_allelic_depth, only_biallelic, only_filter_pass ]
388 auto& vcf_it = *input;
393 for( ; vcf_it; ++vcf_it ) {
394 if( ! vcf_it->has_format(
"AD" ) || ! vcf_it->is_snp() ) {
397 if( only_biallelic && vcf_it->get_alternatives_count() != 1 ) {
400 if( only_filter_pass && ! vcf_it->pass_filter() ) {
408 assert( vcf_it->has_format(
"AD" ) );
409 assert( vcf_it->is_snp() );
433 std::string
const& filename,
435 bool only_filter_pass
438 filename, std::vector<std::string>{},
false,
439 only_biallelic, only_filter_pass
444 std::string
const& filename,
445 std::vector<std::string>
const& sample_names,
446 bool inverse_sample_names,
448 bool only_filter_pass
451 filename, sample_names, inverse_sample_names,
452 true,
true, only_biallelic, only_filter_pass
457 std::string
const& filename,
458 bool use_allelic_depth,
460 bool only_filter_pass
463 filename, std::vector<std::string>{},
false,
464 use_allelic_depth, only_biallelic, only_filter_pass
469 std::string
const& filename,
470 std::vector<std::string>
const& sample_names,
471 bool inverse_sample_names,
472 bool use_allelic_depth,
474 bool only_filter_pass
477 filename, sample_names, inverse_sample_names,
478 false, use_allelic_depth, only_biallelic, only_filter_pass
482 #endif // GENESIS_HTSLIB
490 bool allow_ref_base_mismatches,
491 bool allow_alt_base_mismatches,
492 std::string
const& source_sample_separator
495 auto input = std::make_shared<VariantParallelInputIterator>( parallel_input );
499 auto cur = input->begin();
500 auto end = input->end();
506 for(
auto const& source : input->inputs() ) {
507 auto const& source_name = source.data().
source_name;
508 for(
auto const& sample_name : source.data().sample_names ) {
509 data.
sample_names.push_back( source_name + source_sample_separator + sample_name );
515 [ input, cur, end, allow_ref_base_mismatches, allow_alt_base_mismatches ]
518 variant = cur.joined_variant(
519 allow_ref_base_mismatches, allow_alt_base_mismatches,
true