46 #include <unordered_set>
50 namespace population {
66 template<
class T,
class R>
68 std::string
const& filename,
70 std::vector<size_t>
const& sample_indices,
71 bool inverse_sample_indices,
72 std::vector<bool>
const& sample_filter
76 assert( sample_indices.empty() || sample_filter.empty() );
78 std::shared_ptr<T> input;
79 if( ! sample_indices.empty() ) {
86 auto const smp_cnt = (*input)->samples.size();
90 auto max_it = std::max_element( sample_indices.begin(), sample_indices.end() );
91 if( *max_it + 1 > smp_cnt ) {
92 throw std::invalid_argument(
93 "In " + filename +
": "
94 "Cannot create sample filter for the input file, as the filter index list contains "
96 "while the input file only contains " +
std::to_string( smp_cnt ) +
" samples."
102 if( inverse_sample_indices ) {
105 input = std::make_shared<T>(
111 }
else if( ! sample_filter.empty() ) {
112 input = std::make_shared<T>(
118 input = std::make_shared<T>(
135 std::vector<std::string> result;
136 result.reserve( size );
137 for(
size_t i = 0; i < size; ++i ) {
148 std::vector<Variant>
const& variants
156 if( ! variants.empty() ) {
161 auto cur = variants.begin();
162 auto end = variants.end();
167 [ cur, end ](
Variant& variant )
mutable {
186 #ifdef GENESIS_HTSLIB
189 std::string
const& filename,
195 auto input = std::make_shared<SamVariantInputStream>( reader );
196 input->input_file( filename );
204 auto cur = input->begin();
205 auto end = input->end();
228 assert( cur.sample_size() <= 1 );
229 if( cur.sample_size() == 1 ) {
239 [ input, cur, end ](
Variant& variant )
mutable {
241 variant = std::move( *cur );
252 #endif // GENESIS_HTSLIB
262 std::string
const& filename,
264 std::vector<size_t>
const& sample_indices,
265 bool inverse_sample_indices,
266 std::vector<bool>
const& sample_filter
272 filename, reader, sample_indices, inverse_sample_indices, sample_filter
279 filename, {
".gz",
".plp",
".mplp",
".pileup",
".mpileup" }
288 [ input ](
Variant& variant )
mutable ->
bool {
291 variant = std::move( *it );
303 std::string
const& filename,
307 filename, reader, std::vector<size_t>{},
false, std::vector<bool>{}
312 std::string
const& filename,
313 std::vector<size_t>
const& sample_indices,
314 bool inverse_sample_indices,
318 filename, reader, sample_indices, inverse_sample_indices, std::vector<bool>{}
323 std::string
const& filename,
324 std::vector<bool>
const& sample_filter,
328 filename, reader, std::vector<size_t>{},
false, sample_filter
337 std::string
const& filename,
338 std::vector<size_t>
const& sample_indices,
339 bool inverse_sample_indices,
340 std::vector<bool>
const& sample_filter
347 filename,
SyncReader(), sample_indices, inverse_sample_indices, sample_filter
356 if( input->get_sample_names().size() > 0 ) {
358 data.sample_names = input->get_sample_names();
367 [ input ](
Variant& variant )
mutable {
368 auto& sync_it = *input;
370 variant = std::move( *sync_it );
382 std::string
const& filename
385 filename, std::vector<size_t>{},
false, std::vector<bool>{}
390 std::string
const& filename,
391 std::vector<size_t>
const& sample_indices,
392 bool inverse_sample_indices
395 filename, sample_indices, inverse_sample_indices, std::vector<bool>{}
400 std::string
const& filename,
401 std::vector<bool>
const& sample_filter
404 filename, std::vector<size_t>{},
false, sample_filter
413 std::string
const& filename,
418 filename, std::vector<std::string>{},
false, separator_char, reader
423 std::string
const& filename,
424 std::vector<std::string>
const& sample_names_filter,
425 bool inverse_sample_names_filter,
432 auto input = std::make_shared<FrequencyTableInputStream>( reader );
434 input->sample_names_filter(
435 std::unordered_set<std::string>( sample_names_filter.begin(), sample_names_filter.end() )
437 input->inverse_sample_names_filter( inverse_sample_names_filter );
438 input->separator_char( separator_char );
444 auto cur = input->begin();
445 auto end = input->end();
451 filename, {
".csv",
".csv.gz",
".tsv",
".tsv.gz",
".txt" }
460 [ input, cur, end ](
Variant& variant )
mutable {
465 variant = std::move( *cur );
481 #ifdef GENESIS_HTSLIB
487 std::string
const& filename,
490 bool use_allelic_depth
493 const bool expect_ordered =
false;
498 auto input = std::make_shared<VcfInputStream>(
505 throw std::runtime_error(
506 "Cannot iterate over VCF file " + filename +
" using the \"AD\" FORMAT " +
507 "field to count allelic depths, as that field is not part of the VCF file."
520 [ input, pool_samples, use_allelic_depth, params ]
522 auto& vcf_it = *input;
528 for( ; vcf_it; ++vcf_it ) {
529 if( use_allelic_depth && ! vcf_it->has_format(
"AD" ) ) {
532 if( pool_samples && ! vcf_it->is_snp_or_alt_del() ) {
535 if( params.
only_snps && ! vcf_it->is_snp() ) {
549 assert( ! use_allelic_depth || vcf_it->has_format(
"AD" ) );
550 assert( ! params.
only_snps || vcf_it->is_snp() );
566 if( ! vcf_it->pass_filter() ) {
583 std::string
const& filename,
587 filename, params,
true,
true
592 std::string
const& filename,
594 bool use_allelic_depth
597 filename, params,
false, use_allelic_depth
601 #endif // GENESIS_HTSLIB