45 #include <unordered_set>
49 namespace population {
60 auto input = std::make_shared<VariantParallelInputStream>( parallel_input );
65 bool has_started =
false;
70 std::unordered_set<std::string> uniq_names;
71 for(
auto const& source : input->inputs() ) {
77 for(
auto const& sample_name : source.data().sample_names ) {
78 if( uniq_names.count( sample_name ) > 0 ) {
79 throw std::runtime_error(
80 "Cannot iterate input sources in parallel, as sample name \"" + sample_name +
81 "\" occurs multiple times in the inputs."
84 uniq_names.insert( sample_name );
92 auto params = joined_variant_params;
97 [ input, cur, end, has_started, params ]
101 cur = input->begin();
125 auto input = std::make_shared<VariantGaplessInputStream>( gapless_input );
130 bool has_started =
false;
135 [ input, cur, end, has_started ]
137 if( ! has_started ) {
139 cur = input->begin();
144 variant = std::move( *cur );
151 input->input().data()
162 struct VariantMergeGroupAssignment
167 std::vector<size_t> group_assignments;
174 std::vector<std::string> group_names;
182 std::unordered_map<std::string, std::string>
const& sample_name_to_group,
183 bool allow_ungrouped_samples
186 auto const& sample_names = variant_input.
data().sample_names;
187 if( sample_names.size() == 0 ) {
188 throw std::runtime_error(
"Cannot merge sample groups if no sample names are provided" );
193 VariantMergeGroupAssignment grouping;
194 grouping.group_assignments = std::vector<size_t>( sample_names.size() );
195 std::unordered_map<std::string, size_t> group_to_index;
196 std::unordered_set<std::string> uniq_sample_names;
199 for(
size_t i = 0; i < sample_names.size(); ++i ) {
200 auto const& sample_name = sample_names[i];
201 if( sample_name.empty() ) {
202 throw std::runtime_error(
"Cannot merge sample groups with empty sample names." );
207 if( uniq_sample_names.count( sample_name ) > 0 ) {
208 throw std::runtime_error(
209 "Cannot merge sample groups with duplicate sample names. Sample name \"" +
210 sample_name +
"\" occurs multiple times in the input."
213 uniq_sample_names.insert( sample_name );
220 std::string group_name;
221 if( sample_name_to_group.count( sample_name ) > 0 ) {
222 group_name = sample_name_to_group.at( sample_name );
223 if( group_name.empty() ) {
224 throw std::runtime_error(
225 "Cannot merge sample groups, as sample name \"" + sample_name +
"\" has an " +
226 "empty group name assigned in the provided mapping of sample names to group names."
229 }
else if( allow_ungrouped_samples ) {
230 group_name = sample_name;
232 throw std::runtime_error(
233 "Cannot merge sample groups, as sample name \"" + sample_name +
234 "\" does not occur in the provided mapping of sample names to group names."
237 assert( !group_name.empty() );
242 if( group_to_index.count( group_name ) == 0 ) {
243 auto const next_idx = group_to_index.size();
244 group_to_index[ group_name ] = next_idx;
245 grouping.group_names.push_back( group_name );
247 auto const group_idx = group_to_index.at( group_name );
248 assert( group_idx < group_to_index.size() );
249 assert( group_idx < grouping.group_names.size() );
250 grouping.group_assignments[i] = group_idx;
252 assert( grouping.group_names.size() > 0 );
253 assert( grouping.group_names.size() == group_to_index.size() );
254 assert( uniq_sample_names.size() == sample_names.size() );
259 std::unordered_set<std::string> samples_names_to_warn;
260 for(
auto const& ng : sample_name_to_group ) {
261 if( uniq_sample_names.count( ng.first ) == 0 ) {
262 samples_names_to_warn.insert( ng.first );
265 if( !samples_names_to_warn.empty() ) {
266 LOG_WARN <<
"In the provided list of samples to merge into groups, there were "
268 <<
" sample names that did not occur in the input sample names:\n"
278 std::unordered_map<std::string, std::string>
const& sample_name_to_group,
279 bool allow_ungrouped_samples,
287 variant_input, sample_name_to_group, allow_ungrouped_samples
291 auto input = std::make_shared<VariantInputStream>( variant_input );
296 bool has_started =
false;
299 auto data = variant_input.
data();
300 data.sample_names = grouping.group_names;
305 [ input, cur, end, has_started, grouping, filter_policy ](
Variant& variant )
mutable {
306 if( ! has_started ) {
308 cur = input->begin();
326 auto& cur_var = *cur;
327 auto const sample_count = cur_var.samples.size();
328 auto tmp_samples = std::move( cur_var.samples );
330 cur_var.
samples = std::move( tmp_samples );
331 assert( cur_var.samples.size() == sample_count );
334 if( sample_count != grouping.group_assignments.size() ) {
335 throw std::runtime_error(
336 "Based on sample names and groups, " +
338 " samples are expected to be found in the input, but " +
347 variant.
samples = std::vector<SampleCounts>( grouping.group_names.size() );
348 for(
size_t i = 0; i < sample_count; ++i ) {
349 auto const group_idx = grouping.group_assignments[i];
354 if( group_idx >= variant.
samples.size() ) {
355 throw std::runtime_error(
365 ! cur_var.samples[i].status.passing()