A library for working with phylogenetic and population genetic data.
v0.32.0
fst_cathedral.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_FUNCTION_FST_CATHEDRAL_H_
2 #define GENESIS_POPULATION_FUNCTION_FST_CATHEDRAL_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2024 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@sund.ku.dk>
23  University of Copenhagen, Globe Institute, Section for GeoGenetics
24  Oster Voldgade 5-7, 1350 Copenhagen K, Denmark
25 */
26 
41 
42 #include <cassert>
43 #include <cmath>
44 #include <stdexcept>
45 #include <string>
46 #include <vector>
47 
48 // =================================================================================================
49 // Forward declacations
50 // =================================================================================================
51 
52 namespace genesis {
53 namespace utils {
54 
55  class JsonDocument;
56 
57 } // namespace utils
58 } // namespace genesis
59 
60 namespace genesis {
61 namespace population {
62 
63 // =================================================================================================
64 // Fst Cathedral Plot Record
65 // =================================================================================================
66 
75 {
76  struct Entry
77  {
78  Entry( size_t pos_, double pi_w_, double pi_b_, double pi_t_ )
79  : position( pos_ )
80  , pi_within( pi_w_ )
81  , pi_between( pi_b_ )
82  , pi_total( pi_t_ )
83  {}
84 
85  size_t position = 0;
86  double pi_within = 0.0;
87  double pi_between = 0.0;
88  double pi_total = 0.0;
89  };
90 
91  // The actual components of FST values per position.
92  std::vector<Entry> entries;
93 
94  // Data-derived properties.
95  std::string sample_name_1;
96  std::string sample_name_2;
97 
98  // User-provided properties.
99  // Type of accumulator. We store all three pi values here independently though,
100  // to keep it simple, but use this to know what estimator was used for the data.
102 };
103 
104 // =================================================================================================
105 // Fst Cathedral Accumulator
106 // =================================================================================================
107 
112 {
113 public:
114 
115  // -------------------------------------------------------------------------
116  // Constructors and Rule of Five
117  // -------------------------------------------------------------------------
118 
121  )
122  : fst_estimator_( fst_estimator )
123  {}
124 
125  ~FstCathedralAccumulator() = default;
126 
129 
132 
133  // -------------------------------------------------------------------------
134  // Accumulator Functions
135  // -------------------------------------------------------------------------
136 
137  void accumulate( FstCathedralPlotRecord::Entry const& entry );
138  void dissipate( FstCathedralPlotRecord::Entry const& entry );
139  double aggregate() const;
140  void reset();
141 
142  // -------------------------------------------------------------------------
143  // Private Member Variables
144  // -------------------------------------------------------------------------
145 
146 private:
147 
148  // Type of accumulator.
150 
151  // Store our accumualted values. We are using a Neumaier summation here,
152  // as we might be adding and subtracting values of different orders of magnitude,
153  // which would lead to large errors with the standard Kahan sum.
154  utils::NeumaierSum pi_within_sum_ = 0.0;
155  utils::NeumaierSum pi_between_sum_ = 0.0;
156  utils::NeumaierSum pi_total_sum_ = 0.0;
157  size_t value_count_ = 0;
158 };
159 
160 // =================================================================================================
161 // Fst Cathedral Functions
162 // =================================================================================================
163 
182 std::vector<FstCathedralPlotRecord> compute_fst_cathedral_records_for_chromosome(
184  FstPoolProcessor& processor,
186  std::vector<std::string> const& sample_names = std::vector<std::string>{},
187  std::shared_ptr<genesis::sequence::SequenceDict> const& sequence_dict = nullptr
188 );
189 
201 std::vector<FstCathedralPlotRecord> compute_fst_cathedral_records(
202  VariantInputStream& iterator,
203  FstPoolProcessor& processor,
205  std::vector<std::string> const& sample_names = std::vector<std::string>{},
206  std::shared_ptr<genesis::sequence::SequenceDict> const& sequence_dict = nullptr
207 );
208 
217  CathedralPlotParameters const& parameters,
218  FstCathedralPlotRecord& record
219 ) {
220  auto accu = FstCathedralAccumulator( record.fst_estimator );
221  return compute_cathedral_matrix<FstCathedralPlotRecord, FstCathedralAccumulator>(
222  parameters, record, accu
223  );
224 }
225 
226 // =================================================================================================
227 // Storage Functions
228 // =================================================================================================
229 
238  FstCathedralPlotRecord const& record
239 );
240 
241 } // namespace population
242 } // namespace genesis
243 
244 #endif // include guard
genesis::population::compute_fst_cathedral_matrix
void compute_fst_cathedral_matrix(CathedralPlotParameters const &parameters, FstCathedralPlotRecord &record)
Compute the matrix of values that represents the cathedral plot for FST.
Definition: fst_cathedral.hpp:216
genesis::utils::CompensatedSum
Compensated summation algorithmm, such as Kahan, Neumaier, and Klein summation.
Definition: compensated_sum.hpp:49
fst_pool_processor.hpp
genesis::population::FstCathedralPlotRecord::entries
std::vector< Entry > entries
Definition: fst_cathedral.hpp:92
genesis::population::compute_fst_cathedral_records_for_chromosome
std::vector< FstCathedralPlotRecord > compute_fst_cathedral_records_for_chromosome(VariantInputStream::Iterator &iterator, FstPoolProcessor &processor, FstPoolCalculatorUnbiased::Estimator fst_estimator, std::vector< std::string > const &sample_names, std::shared_ptr< genesis::sequence::SequenceDict > const &sequence_dict)
Compute the components of per-position FST data for all pairs of samples in the given processor,...
Definition: fst_cathedral.cpp:219
genesis::population::FstCathedralPlotRecord::Entry::pi_between
double pi_between
Definition: fst_cathedral.hpp:87
genesis::population::FstCathedralPlotRecord::Entry::Entry
Entry(size_t pos_, double pi_w_, double pi_b_, double pi_t_)
Definition: fst_cathedral.hpp:78
genesis::population::FstCathedralAccumulator::dissipate
void dissipate(FstCathedralPlotRecord::Entry const &entry)
Definition: fst_cathedral.cpp:68
genesis::population::FstCathedralPlotRecord::sample_name_2
std::string sample_name_2
Definition: fst_cathedral.hpp:96
genesis::utils::JsonDocument
Store a Json value of any kind.
Definition: json/document.hpp:114
genesis::population::FstPoolCalculatorUnbiased::Estimator
Estimator
Definition: fst_pool_unbiased.hpp:90
genesis::population::fst_cathedral_plot_record_to_json_document
genesis::utils::JsonDocument fst_cathedral_plot_record_to_json_document(FstCathedralPlotRecord const &record)
Get a user-readable description of the data of a FstCathedralPlotRecord as a JsonDocument.
Definition: fst_cathedral.cpp:307
genesis::population::FstCathedralAccumulator::FstCathedralAccumulator
FstCathedralAccumulator(FstPoolCalculatorUnbiased::Estimator fst_estimator)
Definition: fst_cathedral.hpp:119
genesis::population::FstCathedralAccumulator::accumulate
void accumulate(FstCathedralPlotRecord::Entry const &entry)
Definition: fst_cathedral.cpp:57
genesis::population::CathedralPlotRecord
Collection of the data used for making for a cathedral plot.
Definition: cathedral_plot.hpp:116
genesis::population::FstCathedralAccumulator::operator=
FstCathedralAccumulator & operator=(FstCathedralAccumulator const &)=default
sequence_dict.hpp
matrix.hpp
genesis::population::FstCathedralAccumulator::reset
void reset()
Definition: fst_cathedral.cpp:114
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::FstCathedralPlotRecord::Entry::position
size_t position
Definition: fst_cathedral.hpp:85
genesis::population::FstCathedralAccumulator
Accumulate the partial pi values for a given window to produce a cathedral plot.
Definition: fst_cathedral.hpp:111
fst_pool_unbiased.hpp
genesis::population::FstCathedralPlotRecord::fst_estimator
FstPoolCalculatorUnbiased::Estimator fst_estimator
Definition: fst_cathedral.hpp:101
variant_input_stream.hpp
genesis::population::FstCathedralPlotRecord::Entry
Definition: fst_cathedral.hpp:76
genesis::population::VariantInputStream
utils::GenericInputStream< Variant, VariantInputStreamData > VariantInputStream
Iterate Variants, using a variety of input file formats.
Definition: stream/variant_input_stream.hpp:108
genesis::population::FstCathedralPlotRecord::sample_name_1
std::string sample_name_1
Definition: fst_cathedral.hpp:95
genesis::population::FstCathedralAccumulator::~FstCathedralAccumulator
~FstCathedralAccumulator()=default
genesis::population::FstCathedralPlotRecord
Data for making one FST cathedral plot, that is, one pair of samples and one chromosome.
Definition: fst_cathedral.hpp:74
genesis::population::FstCathedralPlotRecord::Entry::pi_within
double pi_within
Definition: fst_cathedral.hpp:86
genesis::population::FstCathedralPlotRecord::Entry::pi_total
double pi_total
Definition: fst_cathedral.hpp:88
genesis::population::CathedralPlotParameters
Plot parameters to make a cathedral plot.
Definition: cathedral_plot.hpp:94
compensated_sum.hpp
genesis::population::FstCathedralAccumulator::aggregate
double aggregate() const
Definition: fst_cathedral.cpp:93
cathedral_plot.hpp
genesis::utils::GenericInputStream< Variant, VariantInputStreamData >::Iterator
friend Iterator
Definition: generic_input_stream.hpp:846
genesis::population::compute_fst_cathedral_records
std::vector< FstCathedralPlotRecord > compute_fst_cathedral_records(VariantInputStream &iterator, FstPoolProcessor &processor, FstPoolCalculatorUnbiased::Estimator fst_estimator, std::vector< std::string > const &sample_names, std::shared_ptr< genesis::sequence::SequenceDict > const &sequence_dict)
Compute the components of per-position FST data for all pairs of samples in the given processor,...
Definition: fst_cathedral.cpp:275