A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
placement/simulator/functions.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2017 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
35 
37 
39 
43 
44 #include <assert.h>
45 #include <cmath>
46 #include <ostream>
47 #include <stdexcept>
48 #include <vector>
49 
50 namespace genesis {
51 namespace placement {
52 
53 // =================================================================================================
54 // Stream Output Operators
55 // =================================================================================================
56 
57 std::ostream& operator << ( std::ostream& out, SimulatorEdgeDistribution const& distrib )
58 {
59  out << "Weight of each edge: ";
60  out << utils::join( distrib.edge_weights, ", ") << "\n";
61  return out;
62 }
63 
64 std::ostream& operator << ( std::ostream& out, SimulatorExtraPlacementDistribution const& distrib )
65 {
66  out << "Extra placement weights:\n";
67  for( size_t i = 0; i < distrib.placement_number_weights.size(); ++i ) {
68  out << i << ": " << distrib.placement_number_weights[i] << "\n";
69  }
70  out << "Path length weights:\n";
71  for( size_t i = 0; i < distrib.placement_path_length_weights.size(); ++i ) {
72  out << i << ": " << distrib.placement_path_length_weights[i] << "\n";
73  }
74  return out;
75 }
76 
77 std::ostream& operator << ( std::ostream& out, SimulatorLikeWeightRatioDistribution const& distrib )
78 {
79  if( distrib.intervals.size() != distrib.weights.size() ) {
80  throw std::logic_error( "Invalid SimulatorLikeWeightRatioDistribution." );
81  }
82 
83  out << "Like weight ratio intervals and weights:\n";
84  for( size_t i = 0; i < distrib.intervals.size(); ++i ) {
85  out << distrib.intervals[i] << ": " << distrib.weights[i] << "\n";
86  }
87  return out;
88 }
89 
90 // =================================================================================================
91 // Edge Distribution : Set Weights
92 // =================================================================================================
93 
94 // -----------------------------------------------------
95 // set_uniform_weights
96 // -----------------------------------------------------
97 
104 void set_uniform_weights( Sample const& sample, SimulatorEdgeDistribution& edge_distrib )
105 {
106  set_uniform_weights( sample.tree().edge_count(), edge_distrib );
107 }
108 
113 void set_uniform_weights( size_t edge_count, SimulatorEdgeDistribution& edge_distrib )
114 {
115  edge_distrib.edge_weights = std::vector<double>( edge_count, 1.0 );
116 }
117 
118 // -----------------------------------------------------
119 // set_random_weights
120 // -----------------------------------------------------
121 
128 void set_random_weights( Sample const& sample, SimulatorEdgeDistribution& edge_distrib )
129 {
130  set_random_weights( sample.tree().edge_count(), edge_distrib );
131 }
132 
137 void set_random_weights( size_t edge_count, SimulatorEdgeDistribution& edge_distrib )
138 {
139  edge_distrib.edge_weights = std::vector<double>( edge_count, 0.0 );
140 
141  std::uniform_real_distribution<double> distrib( 0.0, 1.0 );
142  for( size_t i = 0; i < edge_count; ++i ) {
143  edge_distrib.edge_weights[i] = distrib( utils::Options::get().random_engine() );
144  }
145 }
146 
147 // -----------------------------------------------------
148 // set_random_edges
149 // -----------------------------------------------------
150 
157 void set_random_edges( Sample const& sample, SimulatorEdgeDistribution& edge_distrib )
158 {
159  set_random_edges( sample.tree().edge_count(), edge_distrib );
160 }
161 
166 void set_random_edges( size_t edge_count, SimulatorEdgeDistribution& edge_distrib )
167 {
168  edge_distrib.edge_weights = std::vector<double>( edge_count, 0.0 );
169 
170  std::bernoulli_distribution distrib;
171  for (size_t i = 0; i < edge_count; ++i) {
172  if( distrib( utils::Options::get().random_engine() )) {
173  edge_distrib.edge_weights[i] = 1.0;
174  }
175  }
176 }
177 
178 // -----------------------------------------------------
179 // set_depths_distributed_weights
180 // -----------------------------------------------------
181 
194 {
195  auto depth_weights = closest_leaf_weight_distribution( sample );
196  set_depths_distributed_weights( sample, depth_weights, edge_distrib );
197 }
198 
217  Sample const& sample,
218  std::vector<double> const& depth_weights,
219  SimulatorEdgeDistribution& edge_distrib
220 ) {
221  // TODO Some of the code is copied from Sample::closest_leaf_weight_distribution(). Maybe
222  // it is worth putting this into a method which returns the closest leaf for edges instead
223  // of nodes.
224 
225  // Prepare weights vector.
226  size_t num_edges = sample.tree().edge_count();
227  edge_distrib.edge_weights = std::vector<double>(num_edges, 0.0);
228 
229  // Get a vector telling us the depth from each node to its closest leaf node.
230  auto depths = closest_leaf_depth_vector(sample.tree());
231 
232  // Set the weight of each edge according to its depth in the tree.
233  for (auto it = sample.tree().begin_edges(); it != sample.tree().end_edges(); ++it) {
234  // Try both nodes at the end of the edge and see which one is closer to a leaf.
235  int dp = depths[(*it)->primary_node().index()].second;
236  int ds = depths[(*it)->secondary_node().index()].second;
237  unsigned int ld = std::min(dp, ds);
238 
239  // Some safty. This holds as long as the indices are correct.
240  assert((*it)->index() < num_edges);
241 
242  // If the depth of the current edge is in the depth vector, use it.
243  // Otherwise, the tree is deeper than the given depth vector, so use zero instead,
244  // which will result in no placements being generated on this edge.
245  if (ld < depth_weights.size()) {
246  edge_distrib.edge_weights[ (*it)->index() ] = depth_weights[ld];
247  } else {
248  edge_distrib.edge_weights[ (*it)->index() ] = 0.0;
249  }
250  }
251 }
252 
253 // -----------------------------------------------------
254 // set_random_subtree_weights
255 // -----------------------------------------------------
256 
263 size_t set_random_subtree_weights( Sample const& sample, SimulatorEdgeDistribution& edge_distrib )
264 {
265  // Reset all edge weights.
266  size_t edge_count = sample.tree().edge_count();
267  edge_distrib.edge_weights = std::vector<double>( edge_count, 0.0 );
268 
269  std::uniform_int_distribution<int> edge_selection( 0, edge_count - 1 );
270  size_t edge_idx = edge_selection( utils::Options::get().random_engine() );
271 
272  // Randomly select a subtree, either starting at the end of the edge in direction of the root,
273  // or away from it.
274  PlacementTreeLink const* start_link;
275  std::bernoulli_distribution dir_distrib;
276  if( dir_distrib( utils::Options::get().random_engine() )) {
277  // Primary direction
278  start_link = &sample.tree().edge_at(edge_idx).primary_link();
279  } else {
280  // Secondary direction
281  start_link = &sample.tree().edge_at(edge_idx).secondary_link();
282  }
283 
284  for (
285  auto cur_link = &start_link->next();
286  cur_link != start_link;
287  cur_link = &cur_link->outer().next()
288  ) {
289  edge_distrib.edge_weights[ cur_link->edge().index() ] = 1.0;
290  }
291 
292  return edge_idx;
293 }
294 
295 // -----------------------------------------------------
296 // set_subtree_weights
297 // -----------------------------------------------------
298 
306  Sample const& sample,
307  size_t link_index,
308  SimulatorEdgeDistribution& edge_distrib
309 ) {
310  // Validity checks.
311  if( link_index >= sample.tree().link_count() ) {
312  throw std::runtime_error( "Invalid link index for subtree." );
313  }
314  if( sample.tree().link_at( link_index ).is_leaf() ) {
315  throw std::runtime_error( "Cannot use a leaf node as subtree." );
316  }
317 
318  // Reset all edge weights.
319  auto edge_count = sample.tree().edge_count();
320  edge_distrib.edge_weights = std::vector<double>( edge_count, 0.0 );
321 
322  // Iterate the subtree and set edge weights.
323  PlacementTreeLink const* start_link = &sample.tree().link_at( link_index );
324  for (
325  auto cur_link = &start_link->next();
326  cur_link != start_link;
327  cur_link = &cur_link->outer().next()
328  ) {
329  edge_distrib.edge_weights[ cur_link->edge().index() ] = 1.0;
330  }
331 }
332 
333 // -----------------------------------------------------
334 // learn_per_edge_weights
335 // -----------------------------------------------------
336 
348 void learn_per_edge_weights( Sample const& sample, SimulatorEdgeDistribution& edge_distrib )
349 {
350  edge_distrib.edge_weights = placement_weight_per_edge( sample );
351 }
352 
353 // =================================================================================================
354 // Placement Number Distribution
355 // =================================================================================================
356 
358  Sample const& sample,
360 ) {
361  auto weights = std::vector<double>();
362  for( auto const& pquery : sample.pqueries() ) {
363  size_t extra_placements = pquery.placement_size() - 1;
364  if( weights.size() < extra_placements + 1 ) {
365  weights.resize( extra_placements + 1 );
366  }
367  ++weights[ extra_placements ];
368  }
369  p_distib.placement_number_weights = weights;
370 }
371 
373  Sample const& sample,
375 ) {
376  // The distance (path length) between two placements is the number of nodes between them.
377  // Get a matrix that gives us this number for each pair of edges of the tree.
378  auto edge_dist_matrix = tree::edge_path_length_matrix( sample.tree() );
379 
380  // Iterate all pqueries and collect the distances between all of their placements, where
381  // the distance is the number of nodes between them.
382  auto weights = std::vector<double>();
383  for( auto const& pquery : sample.pqueries() ) {
384  for( auto const& place_from : pquery.placements() ) {
385  for( auto const& place_to : pquery.placements() ) {
386  size_t dist = edge_dist_matrix(
387  place_from.edge().index(), place_to.edge().index()
388  );
389 
390  // The following assertion holds as long as our dist matrix is correct.
391  // If assertions are disabled (release), the whole condition should be eliminated
392  // from the binary, so there is no speed penalty.
393  if( place_from.edge().index() == place_to.edge().index() ) {
394  assert( dist == 0 );
395  }
396 
397  // We don't need paths of length 0 currently. They will be eliminated in the
398  // distrubution class anyway, when calling prepare().
399  if( dist == 0 ) {
400  continue;
401  }
402  if( weights.size() < dist + 1 ) {
403  weights.resize( dist + 1 );
404  }
405  weights[ dist ] += 1.0;
406  }
407  }
408  }
409  p_distib.placement_path_length_weights = weights;
410 }
411 
412 // =================================================================================================
413 // Like Weight Ratio Distribution
414 // =================================================================================================
415 
417  Sample const& sample,
419  size_t number_of_intervals
420 ) {
421  // Init the result vectors (we need one more element than number of intervals).
422  auto intervals = std::vector<double>( number_of_intervals + 1, 0.0 );
423  auto weights = std::vector<double>( number_of_intervals + 1, 0.0 );
424  for( size_t i = 0; i < number_of_intervals + 1; ++i ) {
425  intervals[i] = static_cast<double>( i ) / static_cast<double>( number_of_intervals );
426  }
427 
428  // Iterate all placements and use their rounded lwr value to increase the weights of the
429  // distribution.
430  for( auto const& pquery : sample.pqueries() ) {
431  for( auto const& placement : pquery.placements() ) {
432 
433  double rounded_lwr = std::round( placement.like_weight_ratio * number_of_intervals )
434  / number_of_intervals;
435 
436  if( 0.0 > rounded_lwr || rounded_lwr > 1.0 ) {
437  throw std::runtime_error( "Invalid like_weight_ratio in Sample." );
438  }
439  size_t pos = static_cast<size_t>( rounded_lwr * number_of_intervals );
440  assert( pos < weights.size() );
441  weights[ pos ] += 1.0;
442  }
443  }
444 
445  // Set the result.
446  lwr_distib.intervals = intervals;
447  lwr_distib.weights = weights;
448 }
449 
450 } // namespace placement
451 } // namespace genesis
void set_depths_distributed_weights(Sample const &sample, SimulatorEdgeDistribution &edge_distrib)
Set the weights of an SimulatorEdgeDistribution so that they follow the depth distribution of the edg...
IteratorEdges begin_edges()
Definition: tree/tree.cpp:498
void set_random_edges(Sample const &sample, SimulatorEdgeDistribution &edge_distrib)
Set the weights of a SimulatorEdgeDistribution randomly to either 0.0 or 1.0, so that a random subset...
IteratorEdges end_edges()
Definition: tree/tree.cpp:503
std::vector< double > closest_leaf_weight_distribution(Sample const &sample)
TreeLink & secondary_link()
Return the TreeLink of this TreeEdge that points away from the root.
Definition: edge.cpp:69
size_t set_random_subtree_weights(Sample const &sample, SimulatorEdgeDistribution &edge_distrib)
Sets the weights of an SimulatorEdgeDistribution to 1.0 for a randomly chosen subtree, all others to 0.0.
PlacementTree & tree()
Get the PlacementTree of this Sample.
Definition: sample.cpp:119
std::vector< double > placement_weight_per_edge(Sample const &sample)
Return a vector that contains the sum of the weights of the PqueryPlacements per edge of the tree of ...
std::string join(T const &v, std::string const &delimiter)
Return a string where the elements of a container v are joined using the string delimiter in between ...
Definition: string.hpp:318
Provides functions for working with Placements and Pqueries.
void set_uniform_weights(Sample const &sample, SimulatorEdgeDistribution &edge_distrib)
Sets the weights of an SimulatorEdgeDistribution to 1.0 for all edges, so that each edge has the same...
Provides some valuable additions to STD.
TreeLink & link_at(size_t index)
Return the TreeLink at a certain index.
Definition: tree/tree.cpp:284
utils::Matrix< size_t > edge_path_length_matrix(Tree const &tree)
utils::Range< iterator_pqueries > pqueries()
Return a Range iterator to the Pqueries .
Definition: sample.cpp:259
void learn_placement_path_length_weights(Sample const &sample, SimulatorExtraPlacementDistribution &p_distib)
TreeLink & primary_link()
Return the TreeLink of this TreeEdge that points towards the root.
Definition: edge.cpp:53
void learn_like_weight_ratio_distribution(Sample const &sample, SimulatorLikeWeightRatioDistribution &lwr_distib, size_t number_of_intervals)
void set_random_weights(Sample const &sample, SimulatorEdgeDistribution &edge_distrib)
Set the weights of an SimulatorEdgeDistribution for the edges randomly to a value between 0...
void learn_per_edge_weights(Sample const &sample, SimulatorEdgeDistribution &edge_distrib)
Sets the weights of an SimulatorEdgeDistributionso that they follow the same distribution of placemen...
Provides some commonly used string utility functions.
void learn_placement_number_weights(Sample const &sample, SimulatorExtraPlacementDistribution &p_distib)
size_t edge_count() const
Return the number of TreeEdges of the Tree.
Definition: tree/tree.cpp:358
Generate a certain number of additional PqueryPlacements around a given PlacementTreeEdge.
Manage a set of Pqueries along with the PlacementTree where the PqueryPlacements are placed on...
Definition: sample.hpp:68
std::vector< std::pair< TreeNode const *, size_t > > closest_leaf_depth_vector(const Tree &tree)
Returns a vector containing the closest leaf node for each node, measured in number of edges between ...
TreeEdge & edge_at(size_t index)
Return the TreeEdge at a certain index.
Definition: tree/tree.cpp:324
Header of Tree distance methods.
std::ostream & operator<<(std::ostream &out, Sample const &smp)
Print a table of all Pqueries with their Placements and Names to the stream.
static Options & get()
Returns a single instance of this class.
Definition: options.hpp:59
size_t link_count() const
Return the number of TreeLinks of the Tree.
Definition: tree/tree.cpp:342
void set_subtree_weights(Sample const &sample, size_t link_index, SimulatorEdgeDistribution &edge_distrib)
Set the weights of a subtree to 1.0 and all other weights to 0.0.