A library for working with phylogenetic and population genetic data.
v0.32.0
placement/function/operators.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2021 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lczech@carnegiescience.edu>
20  Department of Plant Biology, Carnegie Institution For Science
21  260 Panama Street, Stanford, CA 94305, USA
22 */
23 
32 
38 
47 #include "genesis/tree/tree.hpp"
48 
50 
51 #include <cmath>
52 #include <ostream>
53 #include <sstream>
54 #include <unordered_map>
55 
56 namespace genesis {
57 namespace placement {
58 
59 // =================================================================================================
60 // Comparision and Equality
61 // =================================================================================================
62 
63 bool compatible_trees( PlacementTree const& lhs, PlacementTree const& rhs )
64 {
65  auto node_comparator = [] (
66  PlacementTreeNode const& node_l,
67  PlacementTreeNode const& node_r
68  ) {
69  auto l_ptr = dynamic_cast< PlacementNodeData const* >( node_l.data_ptr() );
70  auto r_ptr = dynamic_cast< PlacementNodeData const* >( node_r.data_ptr() );
71  if( l_ptr == nullptr || r_ptr == nullptr ) {
72  return false;
73  }
74  return ( l_ptr->name == r_ptr->name) && ( node_l.index() == node_r.index() );
75  };
76 
77  auto edge_comparator = [] (
78  PlacementTreeEdge const& edge_l,
79  PlacementTreeEdge const& edge_r
80  ) {
81  auto l_ptr = dynamic_cast< PlacementEdgeData const* >( edge_l.data_ptr() );
82  auto r_ptr = dynamic_cast< PlacementEdgeData const* >( edge_r.data_ptr() );
83  if( l_ptr == nullptr || r_ptr == nullptr ) {
84  return false;
85  }
86  return ( l_ptr->edge_num() == r_ptr->edge_num() ) &&
87  ( edge_l.primary_node().index() == edge_r.primary_node().index() ) &&
88  ( edge_l.secondary_node().index() == edge_r.secondary_node().index() );
89  };
90 
91  return tree::equal(
92  lhs, rhs, node_comparator, edge_comparator
93  );
94 }
95 
96 bool compatible_trees( Sample const& lhs, Sample const& rhs )
97 {
98  return compatible_trees( lhs.tree(), rhs.tree() );
99 }
100 
101 // =================================================================================================
102 // Conversion
103 // =================================================================================================
104 
106 {
107  auto node_data_converter = [] ( tree::BaseNodeData const& source_node ) {
108  auto node_data = PlacementNodeData::create();
109  auto& source_data = dynamic_cast< tree::CommonNodeData const& >( source_node );
110  node_data->name = source_data.name;
111  return node_data;
112  };
113 
114  auto edge_data_converter = [] ( tree::BaseEdgeData const& source_edge ) {
115  auto edge_data = PlacementEdgeData::create();
116  auto& source_data = dynamic_cast< tree::CommonEdgeData const& >( source_edge );
117  edge_data->branch_length = source_data.branch_length;
118  return edge_data;
119  };
120 
121  auto result = tree::convert(
122  source_tree,
123  node_data_converter,
124  edge_data_converter
125  );
126 
127  // Need to set the edge nums accordingly, as those are not part of Common Tree Edge Data.
128  reset_edge_nums( result );
129  return result;
130 }
131 
133  Sample const& smp, double const sign, double const scaler, tree::MassTree& target
134 ) {
135  double pendant_work = 0.0;
136 
137  for( auto const& pqry : smp.pqueries() ) {
138  double multiplicity = total_multiplicity( pqry );
139 
140  for( auto const& place : pqry.placements() ) {
141  auto& edge = target.edge_at( place.edge().index() );
142 
143  // Use the relative position of the mass on its original branch to put it to the
144  // same position relative to its new branch.
145  // If any of the branches has length zero, we'd get a nan position, so fix this.
146  double position
147  = place.proximal_length
148  / place.edge().data<PlacementEdgeData>().branch_length
150  ;
151  if(
152  ! std::isfinite( place.edge().data<PlacementEdgeData>().branch_length ) ||
153  ! std::isfinite( edge.data<tree::MassTreeEdgeData>().branch_length ) ||
154  place.edge().data<PlacementEdgeData>().branch_length == 0.0 ||
155  edge.data<tree::MassTreeEdgeData>().branch_length == 0.0
156  ) {
157  position = 0.0;
158  }
159 
160  // Add the mass at that position, normalized and using the sign.
161  edge.data<tree::MassTreeEdgeData>().masses[ position ]
162  += sign * place.like_weight_ratio * multiplicity / scaler
163  ;
164 
165  // Accumulate the work we need to do to move the masses from their pendant
166  // positions to the branches.
167  pendant_work += place.like_weight_ratio * multiplicity * place.pendant_length / scaler;
168  }
169  }
170 
171  return pendant_work;
172 }
173 
174 std::pair< tree::MassTree, double > convert_sample_to_mass_tree( Sample const& sample, bool normalize )
175 {
176  auto mass_tree = tree::convert_common_tree_to_mass_tree( sample.tree() );
177  double const scaler = normalize ? total_placement_mass_with_multiplicities( sample ) : 1.0;
178  double const pend_work = add_sample_to_mass_tree(
179  sample, +1.0, scaler, mass_tree
180  );
181  return { std::move( mass_tree ), pend_work };
182 }
183 
184 std::pair< tree::TreeSet, std::vector<double> >
186 {
187  // Build an average branch length tree for all trees in the SampleSet.
188  // This also serves as a check whether all trees in the set are compatible with each other,
189  // as average_branch_length_tree() throws if the trees have different topologies.
190  // Then, turn the resulting tree into a MassTree.
191  tree::TreeSet avg_tree_set;
192  for( auto const& smp : sample_set ) {
193  avg_tree_set.add( smp.tree() );
194  }
195  auto const mass_tree = tree::convert_common_tree_to_mass_tree(
196  tree::average_branch_length_tree( avg_tree_set )
197  );
198  avg_tree_set.clear();
199  // TODO if we introduce an avg tree calculation that accepts an iterator, we do not need
200  // TODO to create a copied tree set of all trees here.
201 
202  // Prepare mass trees for all Samples, by copying the average mass tree.
203  // This massively speeds up the calculations (at the cost of extra storage for all the trees).
204  auto mass_trees = tree::TreeSet();
205  for( size_t i = 0; i < sample_set.size(); ++i ) {
206  mass_trees.add( mass_tree, sample_set.name_at(i) );
207  }
208  assert( mass_trees.size() == sample_set.size() );
209 
210  // Also, prepare a vector to store the pendant works.
211  auto pend_works = std::vector<double>( sample_set.size(), 0.0 );
212 
213  // Add the placement mass of each Sample to its MassTree.
214  #pragma omp parallel for schedule( dynamic )
215  for( size_t i = 0; i < sample_set.size(); ++i ) {
216  // Get the total sum of placement masses for the sample...
217  double const scaler = normalize
219  : 1.0
220  ;
221 
222  // ... and use it as scaler to add the mass to the mass tree for this sample.
223  double const pend_work = add_sample_to_mass_tree(
224  sample_set[i], +1.0, scaler, mass_trees[i]
225  );
226 
227  // Also, store the pend work.
228  pend_works[ i ] = pend_work;
229  }
230 
231  return { std::move( mass_trees ), std::move( pend_works ) };
232 }
233 
234 // =================================================================================================
235 // Output
236 // =================================================================================================
237 
238 std::ostream& operator << (std::ostream& out, Sample const& smp)
239 {
240  auto table = utils::Table();
242 
243  table.add_column("#").justify(kRight);
244  table.add_column("name");
245  table.add_column("edge_num").justify(kRight);
246  table.add_column("likelihood").justify(kRight);
247  table.add_column("like_weight_ratio").justify(kRight);
248  table.add_column("proximal_length").justify(kRight);
249  table.add_column("pendant_length").justify(kRight);
250 
251  size_t i = 0;
252  for( auto const& pqry : smp.pqueries() ) {
253  std::string name = pqry.name_size() > 0 ? pqry.name_at(0).name : "";
254  if( pqry.name_size() > 1 ) {
255  name += " (+" + std::to_string( pqry.name_size() - 1 ) + ")";
256  }
257 
258  for( auto const& p : pqry.placements() ) {
259  table << std::to_string(i);
260  table << name;
261  table << std::to_string( p.edge_num() );
262  table << std::to_string( p.likelihood );
263  table << std::to_string( p.like_weight_ratio );
264  table << std::to_string( p.proximal_length );
265  table << std::to_string( p.pendant_length );
266  }
267 
268  ++i;
269  }
270 
271  out << utils::simple_layout()(table);
272  return out;
273 }
274 
275 std::string print_tree( Sample const& smp )
276 {
277  auto place_map = placements_per_edge( smp );
278 
279  auto print_line = [ &place_map ]( PlacementTreeNode const& node, PlacementTreeEdge const& edge )
280  {
281  return node.data<PlacementNodeData>().name
282  + " [" + std::to_string(
283  edge.data<PlacementEdgeData>().edge_num()
284  ) + "]" ": "
285  + std::to_string( place_map[ edge.index() ].size() ) + " placements";
286  };
287  return tree::PrinterCompact().print( smp.tree(), print_line );
288 }
289 
290 } // namespace placement
291 } // namespace genesis
table.hpp
genesis::tree::TreeSet::add
void add(Tree const &tree, std::string const &name="")
Add a Tree with a name to the TreeSet.
Definition: tree_set.hpp:88
genesis::placement::total_placement_mass_with_multiplicities
double total_placement_mass_with_multiplicities(Sample const &smp)
Get the mass of all PqueryPlacements of the Sample, using the multiplicities as factors.
Definition: masses.cpp:142
genesis::placement::Sample::tree
PlacementTree & tree()
Get the PlacementTree of this Sample.
Definition: sample.cpp:124
genesis::tree::convert
Tree convert(Tree const &source, std::function< std::unique_ptr< BaseNodeData >(BaseNodeData const &node_data)> node_data_converter, std::function< std::unique_ptr< BaseEdgeData >(BaseEdgeData const &edge_data)> edge_data_converter)
Create a tree with the same topology as the source tree, while converting its data.
Definition: tree/function/operators.cpp:53
functions.hpp
CommonTree functions.
genesis::placement::SampleSet
Store a set of Samples with associated names.
Definition: sample_set.hpp:54
genesis::placement::PlacementEdgeData::edge_num
EdgeNumType edge_num() const
Return the edge_num of this edge. This value is defined by the jplace standard.
Definition: placement_tree.hpp:194
genesis::placement::convert_sample_to_mass_tree
std::pair< tree::MassTree, double > convert_sample_to_mass_tree(Sample const &sample, bool normalize)
Convert a Sample to a tree::MassTree.
Definition: placement/function/operators.cpp:174
placement_tree.hpp
genesis::tree::PrinterCompact
Print a Tree in a compact form, i.e., each node and edge on one line.
Definition: compact.hpp:82
genesis::tree::MassTreeEdgeData
Data class for MassTreeEdges. Stores the branch length and a list of masses with their positions alon...
Definition: tree/mass_tree/tree.hpp:148
genesis::placement::Sample
Manage a set of Pqueries along with the PlacementTree where the PqueryPlacements are placed on.
Definition: sample.hpp:68
genesis::utils::Table
Definition: utils/text/table.hpp:60
tree.hpp
Header of Tree class.
genesis::tree::BaseEdgeData
Base class for storing data on Edges of a Tree.
Definition: edge_data.hpp:66
tree_set.hpp
tree_set.hpp
genesis::tree::Tree::edge_at
TreeEdge & edge_at(size_t index)
Return the TreeEdge at a certain index.
Definition: tree/tree.hpp:238
genesis::placement::PlacementEdgeData::create
static std::unique_ptr< PlacementEdgeData > create()
Definition: placement_tree.hpp:172
genesis::tree::TreeSet::clear
void clear()
Clear the TreeSet and destroy all contained Trees.
Definition: tree_set.hpp:128
functions.hpp
Provides functions for working with Placements and Pqueries.
genesis::tree::equal
bool equal(Tree const &lhs, Tree const &rhs, std::function< bool(TreeNode const &, TreeNode const &) > node_comparator, std::function< bool(TreeEdge const &, TreeEdge const &) > edge_comparator)
Compare two trees for equality given binary comparator functionals for their nodes and edges.
Definition: tree/function/operators.cpp:80
genesis::tree::TreeEdge::secondary_node
TreeNode & secondary_node()
Return the TreeNode of this TreeEdge that points away from the root.
Definition: edge.hpp:162
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: function/genome_locus.hpp:52
genesis::placement::add_sample_to_mass_tree
double add_sample_to_mass_tree(Sample const &smp, double const sign, double const scaler, tree::MassTree &target)
Helper function to copy masses from a Sample to a MassTree.
Definition: placement/function/operators.cpp:132
genesis::tree::TreeSet
Definition: tree_set.hpp:48
genesis::placement::PlacementEdgeData
Data class for PlacementTreeEdges. Stores the branch length of the edge, and the edge_num,...
Definition: placement_tree.hpp:139
tree.hpp
genesis::tree::Tree
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:97
compact.hpp
genesis::placement::PlacementNodeData
Data class for PlacementTreeNodes. Stores a node name.
Definition: placement_tree.hpp:87
genesis::tree::CommonEdgeData::branch_length
double branch_length
Branch length of the edge.
Definition: tree/common_tree/tree.hpp:193
masses.hpp
genesis::utils::Table::Column::Justification::kRight
@ kRight
genesis::tree::CommonNodeData::name
std::string name
Name of the node.
Definition: tree/common_tree/tree.hpp:127
genesis::tree::TreeEdge
Definition: edge.hpp:60
genesis::tree::TreeNode
Definition: tree/tree/node.hpp:58
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::tree::BaseNodeData
Base class for storing data on Nodes of a Tree.
Definition: node_data.hpp:66
genesis::tree::TreeEdge::primary_node
TreeNode & primary_node()
Return the TreeNode of this TreeEdge that points towards the root.
Definition: edge.hpp:146
operators.hpp
Tree operator functions.
tree.hpp
genesis::tree::TreeEdge::data_ptr
BaseEdgeData * data_ptr()
Return a pointer to the data.
Definition: edge.hpp:246
operators.hpp
genesis::utils::simple_layout
TableLayout simple_layout(bool condensed)
Definition: utils/text/table.cpp:445
genesis::tree::convert_common_tree_to_mass_tree
MassTree convert_common_tree_to_mass_tree(CommonTree const &source)
Helper function that takes a CommonTree (or any Tree with Node and Edge data derived from it) and tur...
Definition: tree/mass_tree/tree.hpp:213
functions.hpp
genesis::placement::convert_sample_set_to_mass_trees
std::pair< tree::TreeSet, std::vector< double > > convert_sample_set_to_mass_trees(SampleSet const &sample_set, bool normalize)
Convert all Samples in a SampleSet to tree::MassTrees.
Definition: placement/function/operators.cpp:185
genesis::tree::CommonEdgeData
Common class containing the commonly needed data for tree edges.
Definition: tree/common_tree/tree.hpp:144
genesis::placement::reset_edge_nums
void reset_edge_nums(PlacementTree &tree)
Reset all edge nums of a PlacementTree.
Definition: placement/function/helper.cpp:285
genesis::placement::print_tree
std::string print_tree(Sample const &smp)
Return a simple view of the Tree of a Sample with information about the Pqueries on it.
Definition: placement/function/operators.cpp:275
genesis::tree::average_branch_length_tree
Tree average_branch_length_tree(std::vector< Tree > const &tset)
Return a Tree where the branch lengths are the average of the Trees in the given vector of Trees or T...
Definition: tree/common_tree/functions.cpp:242
genesis::placement::convert_common_tree_to_placement_tree
PlacementTree convert_common_tree_to_placement_tree(tree::CommonTree const &source_tree)
Convert a CommonTree into a PlacementTree.
Definition: placement/function/operators.cpp:105
genesis::tree::CommonNodeData
Common class containing the commonly needed data for tree nodes.
Definition: tree/common_tree/tree.hpp:79
genesis::placement::placements_per_edge
std::vector< std::vector< PqueryPlacement const * > > placements_per_edge(Sample const &smp, bool only_max_lwr_placements)
Return a mapping from each PlacementTreeEdges to the PqueryPlacements that are placed on that edge.
Definition: placement/function/helper.cpp:106
genesis::tree::TreeNode::index
size_t index() const
Return the index of this Node.
Definition: tree/tree/node.hpp:102
sample.hpp
genesis::tree::TreeNode::data
NodeDataType & data()
Definition: tree/tree/node.hpp:203
genesis::tree::TreeEdge::data
EdgeDataType & data()
Definition: edge.hpp:217
genesis::tree::TreeNode::data_ptr
BaseNodeData * data_ptr()
Return a pointer to the data.
Definition: tree/tree/node.hpp:232
genesis::tree::PrinterCompact::print
void print(std::ostream &out, Tree const &tree, std::function< std::string(TreeNode const &node, TreeEdge const &edge)> const print_line)
Print a compact representation of a Tree to an output stream, using a given function for output of th...
Definition: compact.cpp:50
genesis::placement::Sample::pqueries
utils::Range< iterator_pqueries > pqueries()
Return a Range iterator to the Pqueries .
Definition: sample.cpp:264
genesis::utils::normalize
void normalize(Histogram &h, double total)
Definition: operations.cpp:61
genesis::placement::total_multiplicity
double total_multiplicity(Pquery const &pqry)
Return the sum of all multiplicities of the Pquery.
Definition: masses.cpp:65
genesis::placement::operator<<
std::ostream & operator<<(std::ostream &out, Sample const &smp)
Print a table of all Pqueries with their Placements and Names to the stream.
Definition: placement/function/operators.cpp:238
genesis::placement::compatible_trees
bool compatible_trees(PlacementTree const &lhs, PlacementTree const &rhs)
Return whether two PlacementTrees are compatible.
Definition: placement/function/operators.cpp:63
helper.hpp
genesis::placement::PlacementNodeData::create
static std::unique_ptr< PlacementNodeData > create()
Definition: placement_tree.hpp:111