A toolkit for working with phylogenetic data.
v0.24.0
placement/function/operators.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
38 
47 #include "genesis/tree/tree.hpp"
48 
50 
51 #include <ostream>
52 #include <sstream>
53 #include <unordered_map>
54 
55 namespace genesis {
56 namespace placement {
57 
58 // =================================================================================================
59 // Comparision and Equality
60 // =================================================================================================
61 
62 bool compatible_trees( PlacementTree const& lhs, PlacementTree const& rhs )
63 {
64  auto node_comparator = [] (
65  PlacementTreeNode const& node_l,
66  PlacementTreeNode const& node_r
67  ) {
68  auto l_ptr = dynamic_cast< PlacementNodeData const* >( node_l.data_ptr() );
69  auto r_ptr = dynamic_cast< PlacementNodeData const* >( node_r.data_ptr() );
70  if( l_ptr == nullptr || r_ptr == nullptr ) {
71  return false;
72  }
73  return ( l_ptr->name == r_ptr->name) && ( node_l.index() == node_r.index() );
74  };
75 
76  auto edge_comparator = [] (
77  PlacementTreeEdge const& edge_l,
78  PlacementTreeEdge const& edge_r
79  ) {
80  auto l_ptr = dynamic_cast< PlacementEdgeData const* >( edge_l.data_ptr() );
81  auto r_ptr = dynamic_cast< PlacementEdgeData const* >( edge_r.data_ptr() );
82  if( l_ptr == nullptr || r_ptr == nullptr ) {
83  return false;
84  }
85  return ( l_ptr->edge_num() == r_ptr->edge_num() ) &&
86  ( edge_l.primary_node().index() == edge_r.primary_node().index() ) &&
87  ( edge_l.secondary_node().index() == edge_r.secondary_node().index() );
88  };
89 
90  return tree::equal(
91  lhs, rhs, node_comparator, edge_comparator
92  );
93 }
94 
95 bool compatible_trees( Sample const& lhs, Sample const& rhs )
96 {
97  return compatible_trees( lhs.tree(), rhs.tree() );
98 }
99 
100 // =================================================================================================
101 // Conversion
102 // =================================================================================================
103 
105 {
106  auto node_data_converter = [] ( tree::BaseNodeData const& source_node ) {
107  auto node_data = PlacementNodeData::create();
108  auto& source_data = dynamic_cast< tree::CommonNodeData const& >( source_node );
109  node_data->name = source_data.name;
110  return node_data;
111  };
112 
113  auto edge_data_converter = [] ( tree::BaseEdgeData const& source_edge ) {
114  auto edge_data = PlacementEdgeData::create();
115  auto& source_data = dynamic_cast< tree::CommonEdgeData const& >( source_edge );
116  edge_data->branch_length = source_data.branch_length;
117  return edge_data;
118  };
119 
120  auto result = tree::convert(
121  source_tree,
122  node_data_converter,
123  edge_data_converter
124  );
125 
126  // Need to set the edge nums accordingly, as those are not part of Common Tree Edge Data.
127  reset_edge_nums( result );
128  return result;
129 }
130 
132  Sample const& smp, double const sign, double const scaler, tree::MassTree& target
133 ) {
134  double pendant_work = 0.0;
135 
136  for( auto const& pqry : smp.pqueries() ) {
137  double multiplicity = total_multiplicity( pqry );
138 
139  for( auto const& place : pqry.placements() ) {
140  auto& edge = target.edge_at( place.edge().index() );
141 
142  // Use the relative position of the mass on its original branch to put it to the
143  // same position relative to its new branch.
144  double position
145  = place.proximal_length
146  / place.edge().data<PlacementEdgeData>().branch_length
147  * edge.data<tree::MassTreeEdgeData>().branch_length;
148 
149  // Add the mass at that position, normalized and using the sign.
150  edge.data<tree::MassTreeEdgeData>().masses[ position ]
151  += sign * place.like_weight_ratio * multiplicity / scaler;
152 
153  // Accumulate the work we need to do to move the masses from their pendant
154  // positions to the branches.
155  pendant_work += place.like_weight_ratio * multiplicity * place.pendant_length / scaler;
156  }
157  }
158 
159  return pendant_work;
160 }
161 
162 std::pair< tree::MassTree, double > convert_sample_to_mass_tree( Sample const& sample, bool normalize )
163 {
164  auto mass_tree = tree::convert_common_tree_to_mass_tree( sample.tree() );
165  double const scaler = normalize ? total_placement_mass_with_multiplicities( sample ) : 1.0;
166  double const pend_work = add_sample_to_mass_tree(
167  sample, +1.0, scaler, mass_tree
168  );
169  return { std::move( mass_tree ), pend_work };
170 }
171 
172 std::pair< tree::TreeSet, std::vector<double> >
174 {
175  // Build an average branch length tree for all trees in the SampleSet.
176  // This also serves as a check whether all trees in the set are compatible with each other,
177  // as average_branch_length_tree() throws if the trees have different topologies.
178  // Then, turn the resulting tree into a MassTree.
179  tree::TreeSet avg_tree_set;
180  for( auto const& smp : sample_set ) {
181  avg_tree_set.add( smp.tree() );
182  }
183  auto const mass_tree = tree::convert_common_tree_to_mass_tree(
184  tree::average_branch_length_tree( avg_tree_set )
185  );
186  avg_tree_set.clear();
187  // TODO if we introduce an avg tree calculation that accepts an iterator, we do not need
188  // TODO to create a copied tree set of all trees here.
189 
190  // Prepare mass trees for all Samples, by copying the average mass tree.
191  // This massively speeds up the calculations (at the cost of extra storage for all the trees).
192  auto mass_trees = tree::TreeSet();
193  for( size_t i = 0; i < sample_set.size(); ++i ) {
194  mass_trees.add( mass_tree, sample_set.name_at(i) );
195  }
196  assert( mass_trees.size() == sample_set.size() );
197 
198  // Also, prepare a vector to store the pendant works.
199  auto pend_works = std::vector<double>( sample_set.size(), 0.0 );
200 
201  // Add the placement mass of each Sample to its MassTree.
202  #pragma omp parallel for schedule( dynamic )
203  for( size_t i = 0; i < sample_set.size(); ++i ) {
204  // Get the total sum of placement masses for the sample...
205  double const scaler = normalize
207  : 1.0
208  ;
209 
210  // ... and use it as scaler to add the mass to the mass tree for this sample.
211  double const pend_work = add_sample_to_mass_tree(
212  sample_set[i], +1.0, scaler, mass_trees[i]
213  );
214 
215  // Also, store the pend work.
216  pend_works[ i ] = pend_work;
217  }
218 
219  return { std::move( mass_trees ), std::move( pend_works ) };
220 }
221 
222 // =================================================================================================
223 // Output
224 // =================================================================================================
225 
226 std::ostream& operator << (std::ostream& out, Sample const& smp)
227 {
228  auto table = utils::Table();
230 
231  table.add_column("#").justify(kRight);
232  table.add_column("name");
233  table.add_column("edge_num").justify(kRight);
234  table.add_column("likelihood").justify(kRight);
235  table.add_column("like_weight_ratio").justify(kRight);
236  table.add_column("proximal_length").justify(kRight);
237  table.add_column("pendant_length").justify(kRight);
238 
239  size_t i = 0;
240  for( auto const& pqry : smp.pqueries() ) {
241  std::string name = pqry.name_size() > 0 ? pqry.name_at(0).name : "";
242  if( pqry.name_size() > 1 ) {
243  name += " (+" + std::to_string( pqry.name_size() - 1 ) + ")";
244  }
245 
246  for( auto const& p : pqry.placements() ) {
247  table << std::to_string(i);
248  table << name;
249  table << std::to_string( p.edge_num() );
250  table << std::to_string( p.likelihood );
251  table << std::to_string( p.like_weight_ratio );
252  table << std::to_string( p.proximal_length );
253  table << std::to_string( p.pendant_length );
254  }
255 
256  ++i;
257  }
258 
259  out << utils::simple_layout()(table);
260  return out;
261 }
262 
263 std::string print_tree( Sample const& smp )
264 {
265  auto place_map = placements_per_edge( smp );
266 
267  auto print_line = [ &place_map ]( PlacementTreeNode const& node, PlacementTreeEdge const& edge )
268  {
269  return node.data<PlacementNodeData>().name
270  + " [" + std::to_string(
271  edge.data<PlacementEdgeData>().edge_num()
272  ) + "]" ": "
273  + std::to_string( place_map[ edge.index() ].size() ) + " placements";
274  };
275  return tree::PrinterCompact().print( smp.tree(), print_line );
276 }
277 
278 } // namespace placement
279 } // namespace genesis
double add_sample_to_mass_tree(Sample const &smp, double const sign, double const scaler, tree::MassTree &target)
Helper function to copy masses from a Sample to a MassTree.
void add(Tree const &tree, std::string const &name="")
Add a Tree with a name to the TreeSet.
Definition: tree_set.hpp:88
Data class for PlacementTreeEdges. Stores the branch length of the edge, and the edge_num, as defined in the jplace standard.
PlacementTree & tree()
Get the PlacementTree of this Sample.
Definition: sample.cpp:119
std::vector< std::vector< PqueryPlacement const *> > placements_per_edge(Sample const &smp, bool only_max_lwr_placements)
Return a mapping from each PlacementTreeEdges to the PqueryPlacements that are placed on that edge...
TableLayout simple_layout(bool condensed)
Base class for storing data on Nodes of a Tree.
Definition: node_data.hpp:66
Tree operator functions.
Print a Tree in a compact form, i.e., each node and edge on one line.
Definition: compact.hpp:82
Data class for MassTreeEdges. Stores the branch length and a list of masses with their positions alon...
CommonTree functions.
Provides functions for working with Placements and Pqueries.
Tree average_branch_length_tree(std::vector< Tree > const &tset)
Return a Tree where the branch lengths are the average of the Trees in the given vector of Trees or T...
TreeNode & secondary_node()
Return the TreeNode of this TreeEdge that points away from the root.
Definition: edge.hpp:162
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
void clear()
Clear the TreeSet and destroy all contained Trees.
Definition: tree_set.hpp:128
utils::Range< iterator_pqueries > pqueries()
Return a Range iterator to the Pqueries .
Definition: sample.cpp:259
TreeNode & primary_node()
Return the TreeNode of this TreeEdge that points towards the root.
Definition: edge.hpp:146
void reset_edge_nums(PlacementTree &tree)
Reset all edge nums of a PlacementTree.
BaseEdgeData * data_ptr()
Return a pointer to the data.
Definition: edge.hpp:246
std::pair< tree::MassTree, double > convert_sample_to_mass_tree(Sample const &sample, bool normalize)
Convert a Sample to a tree::MassTree.
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:97
PlacementTree convert_common_tree_to_placement_tree(tree::CommonTree const &source_tree)
Convert a CommonTree into a PlacementTree.
Store a set of Samples with associated names.
Definition: sample_set.hpp:54
double total_multiplicity(Pquery const &pqry)
Return the sum of all multiplicities of the Pquery.
Definition: masses.cpp:65
double total_placement_mass_with_multiplicities(Sample const &smp)
Get the mass of all PqueryPlacements of the Sample, using the multiplicities as factors.
Definition: masses.cpp:142
TreeEdge & edge_at(size_t index)
Return the TreeEdge at a certain index.
Definition: tree/tree.hpp:238
std::string name
Name of the node.
static std::unique_ptr< PlacementEdgeData > create()
double branch_length
Branch length of the edge.
Data class for PlacementTreeNodes. Stores a node name.
void normalize(Histogram &h, double total)
Definition: operations.cpp:61
Manage a set of Pqueries along with the PlacementTree where the PqueryPlacements are placed on...
Definition: sample.hpp:68
Common class containing the commonly needed data for tree nodes.
bool compatible_trees(PlacementTree const &lhs, PlacementTree const &rhs)
Return whether two PlacementTrees are compatible.
NodeDataType & data()
Definition: node.hpp:203
std::shared_ptr< BaseOutputTarget > to_string(std::string &target_string)
Obtain an output target for writing to a string.
static std::unique_ptr< PlacementNodeData > create()
Header of Tree class.
size_t index() const
Return the index of this Node.
Definition: node.hpp:102
EdgeNumType edge_num() const
Return the edge_num of this edge. This value is defined by the jplace standard.
Base class for storing data on Edges of a Tree.
Definition: edge_data.hpp:66
Common class containing the commonly needed data for tree edges.
EdgeDataType & data()
Definition: edge.hpp:217
void print(std::ostream &out, Tree const &tree, std::function< std::string(TreeNode const &node, TreeEdge const &edge)> const print_line)
Print a compact representation of a Tree to an output stream, using a given function for output of th...
Definition: compact.cpp:50
bool equal(Tree const &lhs, Tree const &rhs, std::function< bool(TreeNode const &, TreeNode const &) > node_comparator, std::function< bool(TreeEdge const &, TreeEdge const &) > edge_comparator)
Compare two trees for equality given binary comparator functionals for their nodes and edges...
std::string print_tree(Sample const &smp)
Return a simple view of the Tree of a Sample with information about the Pqueries on it...
BaseNodeData * data_ptr()
Return a pointer to the data.
Definition: node.hpp:232
Tree convert(Tree const &source, std::function< std::unique_ptr< BaseNodeData >(BaseNodeData const &node_data)> node_data_converter, std::function< std::unique_ptr< BaseEdgeData >(BaseEdgeData const &edge_data)> edge_data_converter)
Create a tree with the same topology as the source tree, while converting its data.
MassTree convert_common_tree_to_mass_tree(CommonTree const &source)
Helper function that takes a CommonTree (or any Tree with Node and Edge data derived from it) and tur...
std::pair< tree::TreeSet, std::vector< double > > convert_sample_set_to_mass_trees(SampleSet const &sample_set, bool normalize)
Convert all Samples in a SampleSet to tree::MassTrees.
std::ostream & operator<<(std::ostream &out, Sample const &smp)
Print a table of all Pqueries with their Placements and Names to the stream.