A library for working with phylogenetic and population genetic data.
v0.32.0
sample_counts_filter_positional.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_FILTER_SAMPLE_COUNTS_FILTER_POSITIONAL_H_
2 #define GENESIS_POPULATION_FILTER_SAMPLE_COUNTS_FILTER_POSITIONAL_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2024 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@sund.ku.dk>
23  University of Copenhagen, Globe Institute, Section for GeoGenetics
24  Oster Voldgade 5-7, 1350 Copenhagen K, Denmark
25 */
26 
40 
41 #include <functional>
42 #include <memory>
43 #include <stdexcept>
44 #include <string>
45 #include <utility>
46 #include <vector>
47 
48 namespace genesis {
49 namespace population {
50 
51 // =================================================================================================
52 // Filter by region, tagging
53 // =================================================================================================
54 
65 template<class GenomeMaskType>
67  std::vector<std::shared_ptr<GenomeMaskType>> const& sample_masks,
69  bool complement = false
70 ) {
71  // Only allow mask filter tags here.
72  if(
75  ) {
76  throw std::invalid_argument(
77  "Can only use SampleCountsFilterTag::kMaskedPosition or "
78  "SampleCountsFilterTag::kMaskedRegion as tags for "
79  "make_sample_counts_filter_by_region_tagging()."
80  );
81  }
82 
83  // Make a filter function that can be applied as a transformation to a VariantInputStream.
84  // This makes a copy of the masks as well, so that the shared pointers stay alive.
85  return [sample_masks, tag, complement]( Variant& variant )
86  {
87  // Make sure that there is a region list for each sample, and that the Variant is passing.
88  // We need to check this in every call of this function, in case that the Variants are
89  // of different sizes (if so, for some buggy reasons probably).
90  if( sample_masks.size() != variant.samples.size() ) {
91  throw std::invalid_argument(
92  "Inconsistent number of samples, with make_sample_counts_filter_by_region_tagging() "
93  "using " + std::to_string( sample_masks.size() ) + " sample masks, but Variant "
94  "has " + std::to_string( variant.samples.size() ) + " samples present."
95  );
96  }
97  if( ! variant.status.passing() ) {
98  return;
99  }
100 
101  // Apply all per-sample masks. There might be some nullptr masks there,
102  // if particular samples do not have a mask assigned, so we check and skip then.
103  for( size_t i = 0; i < sample_masks.size(); ++i ) {
104  if( ! sample_masks[i] ) {
105  continue;
106  }
107  if( ! variant.samples[i].status.passing() ) {
108  continue;
109  }
110  auto const& regions = *sample_masks[i];
111  auto const keep = complement ^ is_covered( regions, variant );
112  if( ! keep ) {
113  variant.samples[i].status.set( tag );
114  }
115  }
116  };
117 }
118 
119 } // namespace population
120 } // namespace genesis
121 
122 #endif // include guard
genome_region.hpp
genesis::population::make_sample_counts_filter_by_region_tagging
std::function< void(Variant &)> make_sample_counts_filter_by_region_tagging(std::vector< std::shared_ptr< GenomeMaskType >> const &sample_masks, SampleCountsFilterTag tag, bool complement=false)
Filter function to be used with VariantInputStream on a Variant to filter its SampleCounts by genome ...
Definition: sample_counts_filter_positional.hpp:66
genesis::population::SampleCountsFilterTag::kMaskedRegion
@ kMaskedRegion
Position is part of a masked region.
genesis::population::is_covered
bool is_covered(GenomeRegion const &region, std::string const &chromosome, size_t position)
Test whether the chromosome/position is within a given genomic region.
Definition: genome_region.cpp:207
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: function/genome_locus.hpp:52
sample_counts_filter.hpp
genome_region.hpp
sample_counts.hpp
genesis::population::Variant
A single variant at a position in a chromosome, along with SampleCounts for a set of samples.
Definition: variant.hpp:65
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genome_locus_set.hpp
variant.hpp
genesis::population::SampleCountsFilterTag
SampleCountsFilterTag
Definition: sample_counts_filter.hpp:54
genesis::population::SampleCountsFilterTag::kMaskedPosition
@ kMaskedPosition
Position has been masked out from processing.