A toolkit for working with phylogenetic data.
v0.24.0
placement/function/emd.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
39 
46 #include "genesis/tree/tree.hpp"
47 
49 
50 #include <cassert>
51 
52 #ifdef GENESIS_OPENMP
53 # include <omp.h>
54 #endif
55 
56 namespace genesis {
57 namespace placement {
58 
59 // =================================================================================================
60 // Earth Movers Distance
61 // =================================================================================================
62 
63 // -------------------------------------------------------------------------
64 // EMD between two Samples
65 // -------------------------------------------------------------------------
66 
68  Sample const& lhs,
69  Sample const& rhs,
70  double const p,
71  bool const with_pendant_length
72 ) {
73  // Get a tree with the average branch lengths of both provided trees.
74  // This function also throws in case the trees have different topologies.
75  tree::TreeSet tset;
76  tset.add( lhs.tree(), "lhs" );
77  tset.add( rhs.tree(), "rhs" );
78  auto const avg_length_tree = tree::average_branch_length_tree( tset );
79 
80  // Create an EMD tree from the average branch length tree, then calc the EMD.
81  auto mass_tree = tree::convert_common_tree_to_mass_tree( avg_length_tree );
82 
83  // Use the sum of masses as normalization factor for the masses.
84  double totalmass_l = total_placement_mass_with_multiplicities( lhs );
85  double totalmass_r = total_placement_mass_with_multiplicities( rhs );
86 
87  // Copy masses of both samples to the EMD tree, with different signs.
88  double const pendant_work_l = add_sample_to_mass_tree( lhs, +1.0, totalmass_l, mass_tree );
89  double const pendant_work_r = add_sample_to_mass_tree( rhs, -1.0, totalmass_r, mass_tree );
90 
91  // Calculate EMD.
92  double work = tree::earth_movers_distance( mass_tree, p ).first;
93 
94  // If we also want the amount of work that was needed to move the placement masses from their
95  // pendant position to the branch, we need to add those values.
96  if( with_pendant_length ) {
97  work += pendant_work_l + pendant_work_r;
98  }
99 
100  return work;
101 }
102 
103 // -------------------------------------------------------------------------
104 // EMD matrix for a SampleSet
105 // -------------------------------------------------------------------------
106 
108  SampleSet const& sample_set,
109  double const p,
110  bool const with_pendant_length
111 ) {
112  // Get mass tress and the pendant work that was needed to create them.
113  auto const mass_trees = convert_sample_set_to_mass_trees( sample_set, true );
114 
115  // Calculate the pairwise distance matrix.
116  auto result = tree::earth_movers_distance( mass_trees.first, p );
117 
118  // If needed, add the pend work for each matrix position.
119  if( with_pendant_length ) {
120  assert( mass_trees.second.size() == sample_set.size() );
121  for( size_t i = 0; i < sample_set.size(); ++i ) {
122  for( size_t j = 0; j < sample_set.size(); ++j ) {
123  result( i, j ) += mass_trees.second[ i ] + mass_trees.second[ j ];
124  }
125  }
126  }
127 
128  return result;
129 }
130 
131 } // namespace placement
132 } // namespace genesis
double add_sample_to_mass_tree(Sample const &smp, double const sign, double const scaler, tree::MassTree &target)
Helper function to copy masses from a Sample to a MassTree.
size_t size() const
Return the size of the SampleSet, i.e., the number of Samples.
Definition: sample_set.hpp:234
void add(Tree const &tree, std::string const &name="")
Add a Tree with a name to the TreeSet.
Definition: tree_set.hpp:88
PlacementTree & tree()
Get the PlacementTree of this Sample.
Definition: sample.cpp:119
CommonTree functions.
double earth_movers_distance(MassTree const &lhs, MassTree const &rhs, double const p)
Calculate the earth mover&#39;s distance of two distributions of masses on a given Tree.
Provides functions for working with Placements and Pqueries.
Tree average_branch_length_tree(std::vector< Tree > const &tset)
Return a Tree where the branch lengths are the average of the Trees in the given vector of Trees or T...
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
Store a set of Samples with associated names.
Definition: sample_set.hpp:54
double total_placement_mass_with_multiplicities(Sample const &smp)
Get the mass of all PqueryPlacements of the Sample, using the multiplicities as factors.
Definition: masses.cpp:142
double earth_movers_distance(Sample const &lhs, Sample const &rhs, double const p, bool const with_pendant_length)
Calculate the earth mover&#39;s distance between two Samples.
Manage a set of Pqueries along with the PlacementTree where the PqueryPlacements are placed on...
Definition: sample.hpp:68
Header of Tree class.
MassTree convert_common_tree_to_mass_tree(CommonTree const &source)
Helper function that takes a CommonTree (or any Tree with Node and Edge data derived from it) and tur...
std::pair< tree::TreeSet, std::vector< double > > convert_sample_set_to_mass_trees(SampleSet const &sample_set, bool normalize)
Convert all Samples in a SampleSet to tree::MassTrees.