A toolkit for working with phylogenetic data.
v0.19.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
placement/function/operators.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2017 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
37 
44 #include "genesis/tree/tree.hpp"
46 
48 
49 #include <ostream>
50 #include <sstream>
51 #include <unordered_map>
52 
53 namespace genesis {
54 namespace placement {
55 
56 // =================================================================================================
57 // Comparision and Equality
58 // =================================================================================================
59 
60 bool compatible_trees( PlacementTree const& lhs, PlacementTree const& rhs )
61 {
62  auto node_comparator = [] (
63  PlacementTreeNode const& node_l,
64  PlacementTreeNode const& node_r
65  ) {
66  auto l_ptr = dynamic_cast< PlacementNodeData const* >( node_l.data_ptr() );
67  auto r_ptr = dynamic_cast< PlacementNodeData const* >( node_r.data_ptr() );
68  if( l_ptr == nullptr || r_ptr == nullptr ) {
69  return false;
70  }
71  return l_ptr->name == r_ptr->name &&
72  node_l.index() == node_r.index();
73  };
74 
75  auto edge_comparator = [] (
76  PlacementTreeEdge const& edge_l,
77  PlacementTreeEdge const& edge_r
78  ) {
79  auto l_ptr = dynamic_cast< PlacementEdgeData const* >( edge_l.data_ptr() );
80  auto r_ptr = dynamic_cast< PlacementEdgeData const* >( edge_r.data_ptr() );
81  if( l_ptr == nullptr || r_ptr == nullptr ) {
82  return false;
83  }
84  return l_ptr->edge_num() == r_ptr->edge_num() &&
85  edge_l.primary_node().index() == edge_r.primary_node().index() &&
86  edge_l.secondary_node().index() == edge_r.secondary_node().index();
87  };
88 
89  return tree::equal(
90  lhs, rhs, node_comparator, edge_comparator
91  );
92 }
93 
94 bool compatible_trees( Sample const& lhs, Sample const& rhs )
95 {
96  return compatible_trees( lhs.tree(), rhs.tree() );
97 }
98 
99 // =================================================================================================
100 // Conversion
101 // =================================================================================================
102 
104 {
105  auto node_data_converter = [] ( tree::BaseNodeData const& source_node ) {
106  auto node_data = PlacementNodeData::create();
107  auto& source_data = dynamic_cast< tree::DefaultNodeData const& >( source_node );
108  node_data->name = source_data.name;
109  return node_data;
110  };
111 
112  auto edge_data_converter = [] ( tree::BaseEdgeData const& source_edge ) {
113  auto edge_data = PlacementEdgeData::create();
114  auto& source_data = dynamic_cast< tree::DefaultEdgeData const& >( source_edge );
115  edge_data->branch_length = source_data.branch_length;
116  return edge_data;
117  };
118 
119  auto result = tree::convert(
120  source_tree,
121  node_data_converter,
122  edge_data_converter
123  );
124 
125  // Need to set the edge nums accordingly, as those are not part of Default Tree Edge Data.
126  reset_edge_nums( result );
127  return result;
128 }
129 
131  Sample const& smp, double const sign, double const scaler, tree::MassTree& target
132 ) {
133  double pendant_work = 0.0;
134 
135  for( auto const& pqry : smp.pqueries() ) {
136  double multiplicity = total_multiplicity( pqry );
137 
138  for( auto const& place : pqry.placements() ) {
139  auto& edge = target.edge_at( place.edge().index() );
140 
141  // Use the relative position of the mass on its original branch to put it to the
142  // same position relative to its new branch.
143  double position
144  = place.proximal_length
145  / place.edge().data<PlacementEdgeData>().branch_length
146  * edge.data<tree::MassTreeEdgeData>().branch_length;
147 
148  // Add the mass at that position, normalized and using the sign.
149  edge.data<tree::MassTreeEdgeData>().masses[ position ]
150  += sign * place.like_weight_ratio * multiplicity / scaler;
151 
152  // Accumulate the work we need to do to move the masses from their pendant
153  // positions to the branches.
154  pendant_work += place.like_weight_ratio * multiplicity * place.pendant_length / scaler;
155  }
156  }
157 
158  return pendant_work;
159 }
160 
161 std::pair< tree::MassTree, double > convert_sample_to_mass_tree( Sample const& sample )
162 {
163  auto mass_tree = tree::convert_default_tree_to_mass_tree( sample.tree() );
164  double const total_mass = total_placement_mass_with_multiplicities( sample );
165  double const pend_work = add_sample_to_mass_tree(
166  sample, +1.0, total_mass, mass_tree
167  );
168  return { std::move( mass_tree ), pend_work };
169 }
170 
171 std::pair<
172  std::vector<tree::MassTree>,
173  std::vector<double>
175 {
176  // Build an average branch length tree for all trees in the SampleSet.
177  // This also serves as a check whether all trees in the set are compatible with each other,
178  // as average_branch_length_tree() throws if the trees have different topologies.
179  // Then, turn the resulting tree into a MassTree.
180  tree::TreeSet avg_tree_set;
181  for( auto const& smp : sample_set ) {
182  avg_tree_set.add( "", smp.sample.tree() );
183  }
184  auto const mass_tree = tree::convert_default_tree_to_mass_tree(
185  tree::average_branch_length_tree( avg_tree_set )
186  );
187  avg_tree_set.clear();
188  // TODO if we introduce an avg tree calculation that accepts an iterator, we do not need
189  // TODO to create a copied tree set of all trees here.
190 
191  // Prepare mass trees for all Samples, by copying the average mass tree.
192  // This massively speeds up the calculations (at the cost of extra storage for all the trees).
193  auto mass_trees = std::vector<tree::MassTree>( sample_set.size(), mass_tree );
194 
195  // Also, prepare a vector to store the pendant works.
196  auto pend_works = std::vector<double>( sample_set.size(), 0.0 );
197 
198  // Add the placement mass of each Sample to its MassTree.
199  #pragma omp parallel for schedule( dynamic )
200  for( size_t i = 0; i < sample_set.size(); ++i ) {
201  // Get the total sum of placement masses for the sample...
202  double const total_mass = total_placement_mass_with_multiplicities( sample_set[i].sample );
203 
204  // ... and use it as scaler to add the mass to the mass tree for this sample.
205  double const pend_work = add_sample_to_mass_tree(
206  sample_set[i].sample, +1.0, total_mass, mass_trees[i]
207  );
208 
209  // Also, store the pend work.
210  pend_works[ i ] = pend_work;
211  }
212 
213  return { std::move( mass_trees ), std::move( pend_works ) };
214 }
215 
216 // =================================================================================================
217 // Output
218 // =================================================================================================
219 
220 std::ostream& operator << (std::ostream& out, Sample const& smp)
221 {
222  auto table = utils::Table();
224 
225  table.add_column("#").justify(kRight);
226  table.add_column("name");
227  table.add_column("edge_num").justify(kRight);
228  table.add_column("likelihood").justify(kRight);
229  table.add_column("like_weight_ratio").justify(kRight);
230  table.add_column("proximal_length").justify(kRight);
231  table.add_column("pendant_length").justify(kRight);
232 
233  size_t i = 0;
234  for( auto const& pqry : smp.pqueries() ) {
235  std::string name = pqry.name_size() > 0 ? pqry.name_at(0).name : "";
236  if( pqry.name_size() > 1 ) {
237  name += " (+" + std::to_string( pqry.name_size() - 1 ) + ")";
238  }
239 
240  for( auto const& p : pqry.placements() ) {
241  table << std::to_string(i);
242  table << name;
243  table << std::to_string( p.edge_num() );
244  table << std::to_string( p.likelihood );
245  table << std::to_string( p.like_weight_ratio );
246  table << std::to_string( p.proximal_length );
247  table << std::to_string( p.pendant_length );
248  }
249 
250  ++i;
251  }
252 
253  out << utils::simple_layout()(table);
254  return out;
255 }
256 
257 std::string print_tree( Sample const& smp )
258 {
259  auto place_map = placements_per_edge( smp );
260 
261  auto print_line = [ &place_map ]( PlacementTreeNode const& node, PlacementTreeEdge const& edge )
262  {
263  return node.data<PlacementNodeData>().name
264  + " [" + std::to_string(
265  edge.data<PlacementEdgeData>().edge_num()
266  ) + "]" ": "
267  + std::to_string( place_map[ edge.index() ].size() ) + " placements";
268  };
269  return tree::PrinterCompact().print( smp.tree(), print_line );
270 }
271 
272 } // namespace placement
273 } // namespace genesis
std::vector< std::vector< PqueryPlacement const * > > placements_per_edge(Sample const &smp, bool only_max_lwr_placements)
Return a mapping from each PlacementTreeEdges to the PqueryPlacements that are placed on that edge...
Tree convert(Tree const &source, std::function< std::unique_ptr< BaseNodeData >(BaseNodeData const &node_data)> node_data_converter, std::function< std::unique_ptr< BaseEdgeData >(BaseEdgeData const &edge_data)> edge_data_converter)
Create a tree with the same topology as the source tree, while converting its data.
Data class for PlacementTreeEdges. Stores the branch length of the edge, and the edge_num, as defined in the jplace standard.
PlacementTree & tree()
Get the PlacementTree of this Sample.
Definition: sample.cpp:119
bool compatible_trees(PlacementTree const &lhs, PlacementTree const &rhs)
Return whether two PlacementTrees are compatible.
double add_sample_to_mass_tree(Sample const &smp, double const sign, double const scaler, tree::MassTree &target)
Helper function to copy masses from a Sample to a MassTree.
TableLayout simple_layout(bool condensed)
Base class for storing data on Nodes of a Tree.
Definition: node_data.hpp:66
Tree operator functions.
Print a Tree in a compact form, i.e., each node and edge on one line.
Definition: compact.hpp:82
Data class for MassTreeEdges. Stores the branch length and a list of masses with their positions alon...
Default class containing the commonly needed data for tree nodes.
BaseEdgeData * data_ptr()
Return a pointer to the data.
Definition: edge.cpp:128
Provides functions for working with Placements and Pqueries.
PlacementTree convert_default_tree_to_placement_tree(tree::DefaultTree const &source_tree)
Convert a DefaultTree into a PlacementTree.
std::string print_tree(Sample const &smp)
Return a simple view of the Tree of a Sample with information about the Pqueries on it...
std::string to_string(T const &v)
Return a string representation of a given value.
Definition: string.hpp:373
void clear()
Clear the TreeSet and destroy all contained Trees.
Definition: tree_set.cpp:79
size_t index() const
Return the index of this Node.
Definition: node.cpp:48
std::pair< std::vector< tree::MassTree >, std::vector< double >> convert_sample_set_to_mass_trees(SampleSet const &sample_set)
Convert all Samples in a SampleSet to tree::MassTrees.
Default class containing the commonly needed data for tree edges.
MassTree convert_default_tree_to_mass_tree(DefaultTree const &source)
Helper function that takes a DefaultTree (or any Tree with Node and Edge data derived from it) and tu...
utils::Range< iterator_pqueries > pqueries()
Return a Range iterator to the Pqueries .
Definition: sample.cpp:259
double total_multiplicity(Pquery const &pqry)
Return the sum of all multiplicities of the Pquery.
void reset_edge_nums(PlacementTree &tree)
Reset all edge nums of a PlacementTree.
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:95
Store a set of Samples with associated names.
Definition: sample_set.hpp:52
double total_placement_mass_with_multiplicities(Sample const &smp)
Get the mass of all PqueryPlacements of the Sample, using the multiplicities as factors.
TreeNode & secondary_node()
Return the TreeNode of this TreeEdge that points away from the root.
Definition: edge.cpp:101
BaseNodeData * data_ptr()
Return a pointer to the data.
Definition: node.cpp:105
static std::unique_ptr< PlacementEdgeData > create()
Data class for PlacementTreeNodes. Stores a node name.
double branch_length
Branch length of the edge.
Tree average_branch_length_tree(TreeSet const &tset)
Return a Tree where the branch lengths are the average of the Trees in the TreeSet, given that they all have the same topology.
Manage a set of Pqueries along with the PlacementTree where the PqueryPlacements are placed on...
Definition: sample.hpp:68
TreeEdge & edge_at(size_t index)
Return the TreeEdge at a certain index.
Definition: tree/tree.cpp:324
std::string name
Name of the node.
NodeDataType & data()
Definition: node.hpp:108
TreeNode & primary_node()
Return the TreeNode of this TreeEdge that points towards the root.
Definition: edge.cpp:85
std::pair< tree::MassTree, double > convert_sample_to_mass_tree(Sample const &sample)
Convert a Sample to a tree::MassTree.
static std::unique_ptr< PlacementNodeData > create()
Header of Tree class.
std::ostream & operator<<(std::ostream &out, Sample const &smp)
Print a table of all Pqueries with their Placements and Names to the stream.
Base class for storing data on Edges of a Tree.
Definition: edge_data.hpp:66
void add(std::string const &name, Tree const &tree)
Add a Tree with a name to the TreeSet.
Definition: tree_set.cpp:55
EdgeDataType & data()
Definition: edge.hpp:118
bool equal(Tree const &lhs, Tree const &rhs, std::function< bool(TreeNode const &, TreeNode const &) > node_comparator, std::function< bool(TreeEdge const &, TreeEdge const &) > edge_comparator)
Compares two trees for equality given binary comparator functionals for their nodes and edges...
void print(std::ostream &out, Tree const &tree, std::function< std::string(TreeNode const &node, TreeEdge const &edge)> const print_line)
Print a compact representation of a Tree to an output stream, using a given function for output of th...
Definition: compact.cpp:49
int edge_num() const
Return the edge_num of this edge. This value is defined by the jplace standard.