A library for working with phylogenetic and population genetic data.
v0.32.0
cathedral_plot.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_PLOTTING_CATHEDRAL_PLOT_H_
2 #define GENESIS_POPULATION_PLOTTING_CATHEDRAL_PLOT_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2024 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@sund.ku.dk>
23  University of Copenhagen, Globe Institute, Section for GeoGenetics
24  Oster Voldgade 5-7, 1350 Copenhagen K, Denmark
25 */
26 
38 
39 #include <cassert>
40 #include <cmath>
41 #include <cstdint>
42 #include <deque>
43 #include <stdexcept>
44 #include <string>
45 #include <utility>
46 #include <vector>
47 
48 // =================================================================================================
49 // Forward declacations
50 // =================================================================================================
51 
52 namespace genesis {
53 namespace utils {
54 
55  class JsonDocument;
56  class SvgDocument;
57 
58 } // namespace utils
59 } // namespace genesis
60 
61 namespace genesis {
62 namespace population {
63 
64 // =================================================================================================
65 // Cathedral Plot Parameters
66 // =================================================================================================
67 
81 {
83  kGeometric,
84  kLinear
85 };
86 
95 {
96  // Plot parameters
97  size_t width = 0;
98  size_t height = 0;
100 };
101 
117 {
118  // Virtual destructor, just in case. We are problably not going to delete the derived class
119  // through a pointer to base in our use case, but one never knows...
120  virtual ~CathedralPlotRecord() = default;
121 
122  // Data-derived parameters from the initial input.
123  std::string title;
124  std::string plot_name;
125  std::string chromosome_name;
126  size_t chromosome_length = 0;
127 
128  // User-provided parameters, added here to keep track of the record.
130 
131  // Data matrix containing per-pixel values.
133  std::vector<double> window_widths;
134 };
135 
144 
156  CathedralPlotRecord const& record, size_t row
157 );
158 
163 
168 
169 // =================================================================================================
170 // Compute Matrix Functions
171 // =================================================================================================
172 
196 template<class Record, class Accumulator>
198  CathedralPlotParameters const& parameters,
199  Record& record,
200  Accumulator accumulator = Accumulator{}
201 ) {
202  // Prepare a result matrix for the values, of the desired dimensions.
203  record.value_matrix = genesis::utils::Matrix<double>( parameters.height, parameters.width );
204  record.window_widths = std::vector<double>( parameters.height );
205 
206  // Also store the parameters in the record, for later reference to have them in one place.
207  record.parameters = parameters;
208 
209  // How far (in genome coordinates) do we advance between windows?
210  double const chr_len = static_cast<double>( record.chromosome_length );
211  auto const advance = chr_len / static_cast<double>( parameters.width );
212 
213  // Compute each cell of the result. We experimented with parallelizing this loop across threads,
214  // but the computation seems to be memory bound, and even when trying to avoid false sharing
215  // (of writing to individual cells of the matrix in each iteration), the result was never faster
216  // (and often way slower) than the single threaded code here. So let's keep it simple.
217  for( size_t row = 0; row < parameters.height; ++row ) {
218 
219  // How wide (in genome coordinates) is each window in the current row?
220  auto const window_width = cathedral_window_width( record, row );
221  assert( std::isfinite( window_width ) && window_width > 0.0 );
222  record.window_widths[row] = window_width;
223 
224  // Per row, we have a lot of overlap between the windows, up until the very last few
225  // rows where windows tend to overlap less. Using this gives massive speedup,
226  // as we only need to add entries once, and then remove them again once,
227  // instead of computing their accumulated sums over and over again.
228  // We use a deque for the entries that are in the current window,
229  // and keep track of the next index in the entry vector that needs to be enqueued.
230  std::deque<typename Record::Entry> queue;
231  size_t entry_idx = 0;
232 
233  // Start a new accumulation of values for the row.
234  accumulator.reset();
235 
236  // Assert that for one row, we accumulate and dissipate each value exactly once.
237  size_t accu_cnt = 0;
238  size_t diss_cnt = 0;
239  (void) accu_cnt;
240  (void) diss_cnt;
241 
242  // We move along the windows using the cur_gen_pos (in genome coordinates) to indicate
243  // where in the data positions we are. As we center the windows around their pixel positions,
244  // we start at half the width.
245  double cur_gen_pos = - window_width / 2.0;
246 
247  // Iterate the pixels of the columns, computing a window for each of them,
248  for( size_t col = 0; col < parameters.width; ++col ) {
249  assert( cur_gen_pos + window_width >= 0.0 );
250  assert( cur_gen_pos <= static_cast<double>( record.chromosome_length ));
251 
252  // Find the genome positions that correspond to the boundaries of the current window,
253  // limited to their possible ranges, and making sure to include the last one
254  // (can be a bit off due to rounding?! might need to check).
255  auto l_gen_pos = static_cast<size_t>( std::max( cur_gen_pos, 0.0 ));
256  auto r_gen_pos = static_cast<size_t>( std::min( cur_gen_pos + window_width, chr_len ));
257  if( col == parameters.width - 1 ) {
258  r_gen_pos = record.chromosome_length;
259  }
260  cur_gen_pos += advance;
261 
262  // Some checks that hold true if this function is called with correct data.
263  assert( l_gen_pos <= r_gen_pos );
264  assert( r_gen_pos <= record.chromosome_length );
265 
266  // Remove entries in the beginning of the queue that are not part of the window any more.
267  while( ! queue.empty() && queue.front().position < l_gen_pos ) {
268  ++diss_cnt;
269  accumulator.dissipate( queue.front() );
270  queue.pop_front();
271  }
272 
273  // Now accumulate entries that need to be added for the current window.
274  // In cases where due to rounding the windows do not overlap and leave entries
275  // between their boundaries, this here also incluse those entries in the later window.
276  // This is good, as that way, every entry is used at least once.
277  while(
278  entry_idx < record.entries.size() &&
279  record.entries[ entry_idx ].position <= r_gen_pos
280  ) {
281 
282  // Assert that the entries are in order.
283  assert( queue.empty() || queue.back().position < record.entries[entry_idx].position );
284  assert( record.entries[entry_idx].position <= record.chromosome_length );
285 
286  // Accumulate the values and add the entry to the queue.
287  ++accu_cnt;
288  accumulator.accumulate( record.entries[entry_idx] );
289  queue.push_back( record.entries[entry_idx] );
290 
291  // Move to the next entry to be enqueued.
292  ++entry_idx;
293  }
294  assert(
295  entry_idx == record.entries.size() ||
296  record.entries[entry_idx].position > r_gen_pos
297  );
298 
299  // The queue contains entries that are exactly within the window.
300  assert( queue.empty() || queue.front().position >= l_gen_pos );
301  assert( queue.empty() || queue.back().position <= r_gen_pos );
302 
303  // Now we have processed everything for this pixel, and can store the result.
304  record.value_matrix( row, col ) = accumulator.aggregate();
305  }
306 
307  // Assert that for one row, we accumulate and dissipate each value exactly once.
308  // The dissipated ones are not including the remainder of the queue in the last window,
309  // so we need to account for those here.
310  assert( record.entries.size() == accu_cnt );
311  assert( record.entries.size() == diss_cnt + queue.size() );
312  }
313 }
314 
315 // =================================================================================================
316 // Storage Functions
317 // =================================================================================================
318 
326  CathedralPlotParameters const& parameters
327 );
328 
340  CathedralPlotRecord const& record
341 );
342 
351  genesis::utils::JsonDocument const& record_document,
352  genesis::utils::Matrix<double> const& record_value_matrix,
353  std::shared_ptr<genesis::utils::BaseOutputTarget> json_target,
354  std::shared_ptr<genesis::utils::BaseOutputTarget> csv_target
355 );
356 
370  genesis::utils::JsonDocument const& record_document,
371  genesis::utils::Matrix<double> const& record_value_matrix,
372  std::string const& base_path
373 );
374 
389  CathedralPlotRecord const& record,
390  std::string const& base_path
391 );
392 
400 std::pair<genesis::utils::JsonDocument, genesis::utils::Matrix<double>>
402  std::string const& base_path
403 );
404 
412 CathedralPlotRecord load_cathedral_plot_record_from_files(
413  std::string const& base_path
414 );
415 
416 // =================================================================================================
417 // Plotting Functions
418 // =================================================================================================
419 
429  CathedralPlotRecord const& record,
430  genesis::utils::HeatmapParameters const& heatmap_parameters
431 );
432 
442  CathedralPlotRecord const& record,
443  genesis::utils::HeatmapParameters const& heatmap_parameters,
445 );
446 
456  CathedralPlotRecord const& record,
457  genesis::utils::HeatmapParameters const& heatmap_parameters
458 );
459 
460 } // namespace population
461 } // namespace genesis
462 
463 #endif // include guard
heat_map.hpp
genesis::population::cathedral_plot_parameters_to_json_document
genesis::utils::JsonDocument cathedral_plot_parameters_to_json_document(CathedralPlotParameters const &parameters)
Get a user-readable description of a CathedralPlotParameters as a JsonDocument.
Definition: cathedral_plot.cpp:173
genesis::population::CathedralPlotRecord::parameters
CathedralPlotParameters parameters
Definition: cathedral_plot.hpp:129
genesis::population::CathedralWindowWidthMethod::kExponential
@ kExponential
genesis::utils::JsonDocument
Store a Json value of any kind.
Definition: json/document.hpp:114
genesis::population::CathedralWindowWidthMethod::kLinear
@ kLinear
genesis::population::compute_cathedral_matrix
void compute_cathedral_matrix(CathedralPlotParameters const &parameters, Record &record, Accumulator accumulator=Accumulator{})
Template function to compute the value matrix for a cathedral plot, given a recored with plot paramet...
Definition: cathedral_plot.hpp:197
genesis::population::cathedral_window_width_method_from_string
CathedralWindowWidthMethod cathedral_window_width_method_from_string(std::string const &method)
Helper function to return a CathedralWindowWidthMethod from its textual representation.
Definition: cathedral_plot.cpp:152
genesis::population::make_cathedral_plot_svg
genesis::utils::SvgDocument make_cathedral_plot_svg(CathedralPlotRecord const &record, genesis::utils::HeatmapParameters const &heatmap_parameters, genesis::utils::Matrix< genesis::utils::Color > const &image)
Make a cathedral plot heat map and add it into an SVG document with legend and axes.
Definition: cathedral_plot.cpp:354
genesis::population::CathedralWindowWidthMethod::kGeometric
@ kGeometric
genesis::population::CathedralPlotRecord::~CathedralPlotRecord
virtual ~CathedralPlotRecord()=default
genesis::utils::Matrix< double >
genesis::utils::HeatmapParameters
Definition: heat_map.hpp:50
genesis::population::load_cathedral_plot_record_components_from_files
std::pair< genesis::utils::JsonDocument, genesis::utils::Matrix< double > > load_cathedral_plot_record_components_from_files(std::string const &base_path)
Load the parts of a cathedral plot from a set of files.
Definition: cathedral_plot.cpp:276
genesis::population::CathedralPlotParameters::window_width_method
CathedralWindowWidthMethod window_width_method
Definition: cathedral_plot.hpp:99
genesis::population::CathedralPlotRecord
Collection of the data used for making for a cathedral plot.
Definition: cathedral_plot.hpp:116
genesis::population::CathedralPlotRecord::title
std::string title
Definition: cathedral_plot.hpp:123
genesis::population::CathedralPlotRecord::plot_name
std::string plot_name
Definition: cathedral_plot.hpp:124
genesis::population::cathedral_window_width
double cathedral_window_width(CathedralPlotRecord const &record, size_t row)
Compute the window width for a row in a cathedral plot.
Definition: cathedral_plot.cpp:86
genesis::population::CathedralPlotRecord::value_matrix
genesis::utils::Matrix< double > value_matrix
Definition: cathedral_plot.hpp:132
genesis::population::save_cathedral_plot_record_to_targets
void save_cathedral_plot_record_to_targets(genesis::utils::JsonDocument const &record_document, genesis::utils::Matrix< double > const &record_value_matrix, std::shared_ptr< genesis::utils::BaseOutputTarget > json_target, std::shared_ptr< genesis::utils::BaseOutputTarget > csv_target)
Save the record of a cathedral plot in a set of output targets.
Definition: cathedral_plot.cpp:225
matrix.hpp
genesis::population::CathedralPlotParameters::height
size_t height
Definition: cathedral_plot.hpp:98
genesis::population::make_cathedral_plot_heatmap
genesis::utils::Matrix< genesis::utils::Color > make_cathedral_plot_heatmap(CathedralPlotRecord const &record, genesis::utils::HeatmapParameters const &heatmap_parameters)
Make a cathedral plot heat map as a color matrix.
Definition: cathedral_plot.cpp:346
genesis::population::CathedralPlotRecord::chromosome_name
std::string chromosome_name
Definition: cathedral_plot.hpp:125
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::CathedralWindowWidthMethod
CathedralWindowWidthMethod
Interpolation algorithm for window sizes across the rows of a cathedral plot.
Definition: cathedral_plot.hpp:80
color.hpp
Header of Color class.
genesis::utils::SvgDocument
Definition: svg/document.hpp:50
genesis::population::CathedralPlotRecord::chromosome_length
size_t chromosome_length
Definition: cathedral_plot.hpp:126
genesis::population::validate_cathedral_plot_record
void validate_cathedral_plot_record(CathedralPlotRecord const &record)
Check a Cathedral Plot record for internal consistency.
Definition: cathedral_plot.cpp:59
genesis::population::cathedral_window_width_method_to_string
std::string cathedral_window_width_method_to_string(CathedralWindowWidthMethod method)
Helper function to return a textual representation of the method.
Definition: cathedral_plot.cpp:136
genesis::population::save_cathedral_plot_record_to_files
void save_cathedral_plot_record_to_files(genesis::utils::JsonDocument const &record_document, genesis::utils::Matrix< double > const &record_value_matrix, std::string const &base_path)
Save the record of a cathedral plot in a set of files.
Definition: cathedral_plot.cpp:253
genesis::population::cathedral_plot_record_to_json_document
genesis::utils::JsonDocument cathedral_plot_record_to_json_document(CathedralPlotRecord const &record)
Get a user-readable description of the data of a CathedralPlotRecord as a JsonDocument.
Definition: cathedral_plot.cpp:196
genesis::population::CathedralPlotRecord::window_widths
std::vector< double > window_widths
Definition: cathedral_plot.hpp:133
genesis::population::CathedralPlotParameters::width
size_t width
Definition: cathedral_plot.hpp:97
genesis::population::CathedralPlotParameters
Plot parameters to make a cathedral plot.
Definition: cathedral_plot.hpp:94
base_output_target.hpp
genesis::population::load_cathedral_plot_record_from_files
CathedralPlotRecord load_cathedral_plot_record_from_files(std::string const &base_path)
Load the record of a cathedral plot from a set of files.
Definition: cathedral_plot.cpp:311