A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
placement/function/operators.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
38 
47 #include "genesis/tree/tree.hpp"
48 
50 
51 #include <ostream>
52 #include <sstream>
53 #include <unordered_map>
54 
55 namespace genesis {
56 namespace placement {
57 
58 // =================================================================================================
59 // Comparision and Equality
60 // =================================================================================================
61 
62 bool compatible_trees( PlacementTree const& lhs, PlacementTree const& rhs )
63 {
64  auto node_comparator = [] (
65  PlacementTreeNode const& node_l,
66  PlacementTreeNode const& node_r
67  ) {
68  auto l_ptr = dynamic_cast< PlacementNodeData const* >( node_l.data_ptr() );
69  auto r_ptr = dynamic_cast< PlacementNodeData const* >( node_r.data_ptr() );
70  if( l_ptr == nullptr || r_ptr == nullptr ) {
71  return false;
72  }
73  return l_ptr->name == r_ptr->name &&
74  node_l.index() == node_r.index();
75  };
76 
77  auto edge_comparator = [] (
78  PlacementTreeEdge const& edge_l,
79  PlacementTreeEdge const& edge_r
80  ) {
81  auto l_ptr = dynamic_cast< PlacementEdgeData const* >( edge_l.data_ptr() );
82  auto r_ptr = dynamic_cast< PlacementEdgeData const* >( edge_r.data_ptr() );
83  if( l_ptr == nullptr || r_ptr == nullptr ) {
84  return false;
85  }
86  return l_ptr->edge_num() == r_ptr->edge_num() &&
87  edge_l.primary_node().index() == edge_r.primary_node().index() &&
88  edge_l.secondary_node().index() == edge_r.secondary_node().index();
89  };
90 
91  return tree::equal(
92  lhs, rhs, node_comparator, edge_comparator
93  );
94 }
95 
96 bool compatible_trees( Sample const& lhs, Sample const& rhs )
97 {
98  return compatible_trees( lhs.tree(), rhs.tree() );
99 }
100 
101 // =================================================================================================
102 // Conversion
103 // =================================================================================================
104 
106 {
107  auto node_data_converter = [] ( tree::BaseNodeData const& source_node ) {
108  auto node_data = PlacementNodeData::create();
109  auto& source_data = dynamic_cast< tree::DefaultNodeData const& >( source_node );
110  node_data->name = source_data.name;
111  return node_data;
112  };
113 
114  auto edge_data_converter = [] ( tree::BaseEdgeData const& source_edge ) {
115  auto edge_data = PlacementEdgeData::create();
116  auto& source_data = dynamic_cast< tree::DefaultEdgeData const& >( source_edge );
117  edge_data->branch_length = source_data.branch_length;
118  return edge_data;
119  };
120 
121  auto result = tree::convert(
122  source_tree,
123  node_data_converter,
124  edge_data_converter
125  );
126 
127  // Need to set the edge nums accordingly, as those are not part of Default Tree Edge Data.
128  reset_edge_nums( result );
129  return result;
130 }
131 
133  Sample const& smp, double const sign, double const scaler, tree::MassTree& target
134 ) {
135  double pendant_work = 0.0;
136 
137  for( auto const& pqry : smp.pqueries() ) {
138  double multiplicity = total_multiplicity( pqry );
139 
140  for( auto const& place : pqry.placements() ) {
141  auto& edge = target.edge_at( place.edge().index() );
142 
143  // Use the relative position of the mass on its original branch to put it to the
144  // same position relative to its new branch.
145  double position
146  = place.proximal_length
147  / place.edge().data<PlacementEdgeData>().branch_length
148  * edge.data<tree::MassTreeEdgeData>().branch_length;
149 
150  // Add the mass at that position, normalized and using the sign.
151  edge.data<tree::MassTreeEdgeData>().masses[ position ]
152  += sign * place.like_weight_ratio * multiplicity / scaler;
153 
154  // Accumulate the work we need to do to move the masses from their pendant
155  // positions to the branches.
156  pendant_work += place.like_weight_ratio * multiplicity * place.pendant_length / scaler;
157  }
158  }
159 
160  return pendant_work;
161 }
162 
163 std::pair< tree::MassTree, double > convert_sample_to_mass_tree( Sample const& sample )
164 {
165  auto mass_tree = tree::convert_default_tree_to_mass_tree( sample.tree() );
166  double const total_mass = total_placement_mass_with_multiplicities( sample );
167  double const pend_work = add_sample_to_mass_tree(
168  sample, +1.0, total_mass, mass_tree
169  );
170  return { std::move( mass_tree ), pend_work };
171 }
172 
173 std::pair<
174  std::vector<tree::MassTree>,
175  std::vector<double>
177 {
178  // Build an average branch length tree for all trees in the SampleSet.
179  // This also serves as a check whether all trees in the set are compatible with each other,
180  // as average_branch_length_tree() throws if the trees have different topologies.
181  // Then, turn the resulting tree into a MassTree.
182  tree::TreeSet avg_tree_set;
183  for( auto const& smp : sample_set ) {
184  avg_tree_set.add( "", smp.sample.tree() );
185  }
186  auto const mass_tree = tree::convert_default_tree_to_mass_tree(
187  tree::average_branch_length_tree( avg_tree_set )
188  );
189  avg_tree_set.clear();
190  // TODO if we introduce an avg tree calculation that accepts an iterator, we do not need
191  // TODO to create a copied tree set of all trees here.
192 
193  // Prepare mass trees for all Samples, by copying the average mass tree.
194  // This massively speeds up the calculations (at the cost of extra storage for all the trees).
195  auto mass_trees = std::vector<tree::MassTree>( sample_set.size(), mass_tree );
196 
197  // Also, prepare a vector to store the pendant works.
198  auto pend_works = std::vector<double>( sample_set.size(), 0.0 );
199 
200  // Add the placement mass of each Sample to its MassTree.
201  #pragma omp parallel for schedule( dynamic )
202  for( size_t i = 0; i < sample_set.size(); ++i ) {
203  // Get the total sum of placement masses for the sample...
204  double const total_mass = total_placement_mass_with_multiplicities( sample_set[i].sample );
205 
206  // ... and use it as scaler to add the mass to the mass tree for this sample.
207  double const pend_work = add_sample_to_mass_tree(
208  sample_set[i].sample, +1.0, total_mass, mass_trees[i]
209  );
210 
211  // Also, store the pend work.
212  pend_works[ i ] = pend_work;
213  }
214 
215  return { std::move( mass_trees ), std::move( pend_works ) };
216 }
217 
218 // =================================================================================================
219 // Output
220 // =================================================================================================
221 
222 std::ostream& operator << (std::ostream& out, Sample const& smp)
223 {
224  auto table = utils::Table();
226 
227  table.add_column("#").justify(kRight);
228  table.add_column("name");
229  table.add_column("edge_num").justify(kRight);
230  table.add_column("likelihood").justify(kRight);
231  table.add_column("like_weight_ratio").justify(kRight);
232  table.add_column("proximal_length").justify(kRight);
233  table.add_column("pendant_length").justify(kRight);
234 
235  size_t i = 0;
236  for( auto const& pqry : smp.pqueries() ) {
237  std::string name = pqry.name_size() > 0 ? pqry.name_at(0).name : "";
238  if( pqry.name_size() > 1 ) {
239  name += " (+" + std::to_string( pqry.name_size() - 1 ) + ")";
240  }
241 
242  for( auto const& p : pqry.placements() ) {
243  table << std::to_string(i);
244  table << name;
245  table << std::to_string( p.edge_num() );
246  table << std::to_string( p.likelihood );
247  table << std::to_string( p.like_weight_ratio );
248  table << std::to_string( p.proximal_length );
249  table << std::to_string( p.pendant_length );
250  }
251 
252  ++i;
253  }
254 
255  out << utils::simple_layout()(table);
256  return out;
257 }
258 
259 std::string print_tree( Sample const& smp )
260 {
261  auto place_map = placements_per_edge( smp );
262 
263  auto print_line = [ &place_map ]( PlacementTreeNode const& node, PlacementTreeEdge const& edge )
264  {
265  return node.data<PlacementNodeData>().name
266  + " [" + std::to_string(
267  edge.data<PlacementEdgeData>().edge_num()
268  ) + "]" ": "
269  + std::to_string( place_map[ edge.index() ].size() ) + " placements";
270  };
271  return tree::PrinterCompact().print( smp.tree(), print_line );
272 }
273 
274 } // namespace placement
275 } // namespace genesis
std::vector< std::vector< PqueryPlacement const * > > placements_per_edge(Sample const &smp, bool only_max_lwr_placements)
Return a mapping from each PlacementTreeEdges to the PqueryPlacements that are placed on that edge...
Tree convert(Tree const &source, std::function< std::unique_ptr< BaseNodeData >(BaseNodeData const &node_data)> node_data_converter, std::function< std::unique_ptr< BaseEdgeData >(BaseEdgeData const &edge_data)> edge_data_converter)
Create a tree with the same topology as the source tree, while converting its data.
Data class for PlacementTreeEdges. Stores the branch length of the edge, and the edge_num, as defined in the jplace standard.
PlacementTree & tree()
Get the PlacementTree of this Sample.
Definition: sample.cpp:119
bool compatible_trees(PlacementTree const &lhs, PlacementTree const &rhs)
Return whether two PlacementTrees are compatible.
double add_sample_to_mass_tree(Sample const &smp, double const sign, double const scaler, tree::MassTree &target)
Helper function to copy masses from a Sample to a MassTree.
TableLayout simple_layout(bool condensed)
Base class for storing data on Nodes of a Tree.
Definition: node_data.hpp:66
Tree operator functions.
Print a Tree in a compact form, i.e., each node and edge on one line.
Definition: compact.hpp:82
Data class for MassTreeEdges. Stores the branch length and a list of masses with their positions alon...
Default class containing the commonly needed data for tree nodes.
Provides functions for working with Placements and Pqueries.
PlacementTree convert_default_tree_to_placement_tree(tree::DefaultTree const &source_tree)
Convert a DefaultTree into a PlacementTree.
std::string print_tree(Sample const &smp)
Return a simple view of the Tree of a Sample with information about the Pqueries on it...
TreeNode & secondary_node()
Return the TreeNode of this TreeEdge that points away from the root.
Definition: edge.hpp:161
std::string to_string(T const &v)
Return a string representation of a given value.
Definition: string.hpp:381
void clear()
Clear the TreeSet and destroy all contained Trees.
Definition: tree_set.cpp:79
size_t index() const
Return the index of this Node.
Definition: node.hpp:100
std::pair< std::vector< tree::MassTree >, std::vector< double >> convert_sample_set_to_mass_trees(SampleSet const &sample_set)
Convert all Samples in a SampleSet to tree::MassTrees.
Default class containing the commonly needed data for tree edges.
MassTree convert_default_tree_to_mass_tree(DefaultTree const &source)
Helper function that takes a DefaultTree (or any Tree with Node and Edge data derived from it) and tu...
utils::Range< iterator_pqueries > pqueries()
Return a Range iterator to the Pqueries .
Definition: sample.cpp:259
double total_multiplicity(Pquery const &pqry)
Return the sum of all multiplicities of the Pquery.
Definition: masses.cpp:65
TreeNode & primary_node()
Return the TreeNode of this TreeEdge that points towards the root.
Definition: edge.hpp:145
void reset_edge_nums(PlacementTree &tree)
Reset all edge nums of a PlacementTree.
BaseEdgeData * data_ptr()
Return a pointer to the data.
Definition: edge.hpp:212
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:95
Store a set of Samples with associated names.
Definition: sample_set.hpp:52
double total_placement_mass_with_multiplicities(Sample const &smp)
Get the mass of all PqueryPlacements of the Sample, using the multiplicities as factors.
Definition: masses.cpp:142
static std::unique_ptr< PlacementEdgeData > create()
Data class for PlacementTreeNodes. Stores a node name.
double branch_length
Branch length of the edge.
Default Tree functions.
Tree average_branch_length_tree(TreeSet const &tset)
Return a Tree where the branch lengths are the average of the Trees in the TreeSet, given that they all have the same topology.
Manage a set of Pqueries along with the PlacementTree where the PqueryPlacements are placed on...
Definition: sample.hpp:68
TreeEdge & edge_at(size_t index)
Return the TreeEdge at a certain index.
Definition: tree/tree.cpp:324
std::string name
Name of the node.
NodeDataType & data()
Definition: node.hpp:152
std::pair< tree::MassTree, double > convert_sample_to_mass_tree(Sample const &sample)
Convert a Sample to a tree::MassTree.
static std::unique_ptr< PlacementNodeData > create()
Header of Tree class.
std::ostream & operator<<(std::ostream &out, Sample const &smp)
Print a table of all Pqueries with their Placements and Names to the stream.
Base class for storing data on Edges of a Tree.
Definition: edge_data.hpp:66
void add(std::string const &name, Tree const &tree)
Add a Tree with a name to the TreeSet.
Definition: tree_set.cpp:55
EdgeDataType & data()
Definition: edge.hpp:183
bool equal(Tree const &lhs, Tree const &rhs, std::function< bool(TreeNode const &, TreeNode const &) > node_comparator, std::function< bool(TreeEdge const &, TreeEdge const &) > edge_comparator)
Compares two trees for equality given binary comparator functionals for their nodes and edges...
void print(std::ostream &out, Tree const &tree, std::function< std::string(TreeNode const &node, TreeEdge const &edge)> const print_line)
Print a compact representation of a Tree to an output stream, using a given function for output of th...
Definition: compact.cpp:50
BaseNodeData * data_ptr()
Return a pointer to the data.
Definition: node.hpp:181
int edge_num() const
Return the edge_num of this edge. This value is defined by the jplace standard.