A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
placement/function/emd.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
39 
46 #include "genesis/tree/tree.hpp"
47 
49 
50 #include <cassert>
51 
52 #ifdef GENESIS_OPENMP
53 # include <omp.h>
54 #endif
55 
56 namespace genesis {
57 namespace placement {
58 
59 // =================================================================================================
60 // Earth Movers Distance
61 // =================================================================================================
62 
63 // -------------------------------------------------------------------------
64 // EMD between two Samples
65 // -------------------------------------------------------------------------
66 
68  Sample const& lhs,
69  Sample const& rhs,
70  double const p,
71  bool const with_pendant_length
72 ) {
73  // Get a tree with the average branch lengths of both provided trees.
74  // This function also throws in case the trees have different topologies.
75  tree::TreeSet tset;
76  tset.add( "lhs", lhs.tree() );
77  tset.add( "rhs", rhs.tree() );
78  auto const avg_length_tree = tree::average_branch_length_tree( tset );
79 
80  // Create an EMD tree from the average branch length tree, then calc the EMD.
81  auto mass_tree = tree::convert_default_tree_to_mass_tree( avg_length_tree );
82 
83  // Use the sum of masses as normalization factor for the masses.
84  double totalmass_l = total_placement_mass_with_multiplicities( lhs );
85  double totalmass_r = total_placement_mass_with_multiplicities( rhs );
86 
87  // Copy masses of both samples to the EMD tree, with different signs.
88  double const pendant_work_l = add_sample_to_mass_tree( lhs, +1.0, totalmass_l, mass_tree );
89  double const pendant_work_r = add_sample_to_mass_tree( rhs, -1.0, totalmass_r, mass_tree );
90 
91  // Calculate EMD.
92  double work = tree::earth_movers_distance( mass_tree, p ).first;
93 
94  // If we also want the amount of work that was needed to move the placement masses from their
95  // pendant position to the branch, we need to add those values.
96  if( with_pendant_length ) {
97  work += pendant_work_l + pendant_work_r;
98  }
99 
100  return work;
101 }
102 
103 // -------------------------------------------------------------------------
104 // EMD matrix for a SampleSet
105 // -------------------------------------------------------------------------
106 
108  SampleSet const& sample_set,
109  double const p,
110  bool const with_pendant_length
111 ) {
112  // Get mass tress and the pendant work that was needed to create them.
113  auto const mass_trees = convert_sample_set_to_mass_trees( sample_set );
114 
115  // Calculate the pairwise distance matrix.
116  auto result = tree::earth_movers_distance( mass_trees.first, p );
117 
118  // If needed, add the pend work for each matrix position.
119  if( with_pendant_length ) {
120  assert( mass_trees.second.size() == sample_set.size() );
121  for( size_t i = 0; i < sample_set.size(); ++i ) {
122  for( size_t j = 0; j < sample_set.size(); ++j ) {
123  result( i, j ) += mass_trees.second[ i ] + mass_trees.second[ j ];
124  }
125  }
126  }
127 
128  return result;
129 }
130 
131 } // namespace placement
132 } // namespace genesis
PlacementTree & tree()
Get the PlacementTree of this Sample.
Definition: sample.cpp:119
double add_sample_to_mass_tree(Sample const &smp, double const sign, double const scaler, tree::MassTree &target)
Helper function to copy masses from a Sample to a MassTree.
double earth_movers_distance(Sample const &lhs, Sample const &rhs, double const p, bool const with_pendant_length)
Calculate the earth mover's distance between two Samples.
Provides functions for working with Placements and Pqueries.
std::pair< std::vector< tree::MassTree >, std::vector< double >> convert_sample_set_to_mass_trees(SampleSet const &sample_set)
Convert all Samples in a SampleSet to tree::MassTrees.
MassTree convert_default_tree_to_mass_tree(DefaultTree const &source)
Helper function that takes a DefaultTree (or any Tree with Node and Edge data derived from it) and tu...
Store a set of Samples with associated names.
Definition: sample_set.hpp:52
double total_placement_mass_with_multiplicities(Sample const &smp)
Get the mass of all PqueryPlacements of the Sample, using the multiplicities as factors.
Definition: masses.cpp:142
size_t size() const
Return the size of the SampleSet, i.e., the number of Samples.
Definition: sample_set.hpp:234
Default Tree functions.
Tree average_branch_length_tree(TreeSet const &tset)
Return a Tree where the branch lengths are the average of the Trees in the TreeSet, given that they all have the same topology.
Manage a set of Pqueries along with the PlacementTree where the PqueryPlacements are placed on...
Definition: sample.hpp:68
Header of Tree class.
void add(std::string const &name, Tree const &tree)
Add a Tree with a name to the TreeSet.
Definition: tree_set.cpp:55
double earth_movers_distance(MassTree const &lhs, MassTree const &rhs, double const p)
Calculate the earth mover's distance of two distributions of masses on a given Tree.