A library for working with phylogenetic and population genetic data.
v0.32.0
placement/function/emd.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
39 
46 #include "genesis/tree/tree.hpp"
47 
49 
50 #include <cassert>
51 
52 #ifdef GENESIS_OPENMP
53 # include <omp.h>
54 #endif
55 
56 namespace genesis {
57 namespace placement {
58 
59 // =================================================================================================
60 // Earth Movers Distance
61 // =================================================================================================
62 
63 // -------------------------------------------------------------------------
64 // EMD between two Samples
65 // -------------------------------------------------------------------------
66 
68  Sample const& lhs,
69  Sample const& rhs,
70  double const p,
71  bool const with_pendant_length
72 ) {
73  // Get a tree with the average branch lengths of both provided trees.
74  // This function also throws in case the trees have different topologies.
75  tree::TreeSet tset;
76  tset.add( lhs.tree(), "lhs" );
77  tset.add( rhs.tree(), "rhs" );
78  auto const avg_length_tree = tree::average_branch_length_tree( tset );
79 
80  // Create an EMD tree from the average branch length tree, then calc the EMD.
81  auto mass_tree = tree::convert_common_tree_to_mass_tree( avg_length_tree );
82 
83  // Use the sum of masses as normalization factor for the masses.
84  double totalmass_l = total_placement_mass_with_multiplicities( lhs );
85  double totalmass_r = total_placement_mass_with_multiplicities( rhs );
86 
87  // Copy masses of both samples to the EMD tree, with different signs.
88  double const pendant_work_l = add_sample_to_mass_tree( lhs, +1.0, totalmass_l, mass_tree );
89  double const pendant_work_r = add_sample_to_mass_tree( rhs, -1.0, totalmass_r, mass_tree );
90 
91  // Calculate EMD.
92  double work = tree::earth_movers_distance( mass_tree, p ).first;
93 
94  // If we also want the amount of work that was needed to move the placement masses from their
95  // pendant position to the branch, we need to add those values.
96  if( with_pendant_length ) {
97  work += pendant_work_l + pendant_work_r;
98  }
99 
100  return work;
101 }
102 
103 // -------------------------------------------------------------------------
104 // EMD matrix for a SampleSet
105 // -------------------------------------------------------------------------
106 
108  SampleSet const& sample_set,
109  double const p,
110  bool const with_pendant_length
111 ) {
112  // Get mass tress and the pendant work that was needed to create them.
113  auto const mass_trees = convert_sample_set_to_mass_trees( sample_set, true );
114 
115  // Calculate the pairwise distance matrix.
116  auto result = tree::earth_movers_distance( mass_trees.first, p );
117 
118  // If needed, add the pend work for each matrix position.
119  if( with_pendant_length ) {
120  assert( mass_trees.second.size() == sample_set.size() );
121  for( size_t i = 0; i < sample_set.size(); ++i ) {
122  for( size_t j = 0; j < sample_set.size(); ++j ) {
123  result( i, j ) += mass_trees.second[ i ] + mass_trees.second[ j ];
124  }
125  }
126  }
127 
128  return result;
129 }
130 
131 } // namespace placement
132 } // namespace genesis
genesis::tree::TreeSet::add
void add(Tree const &tree, std::string const &name="")
Add a Tree with a name to the TreeSet.
Definition: tree_set.hpp:88
genesis::placement::total_placement_mass_with_multiplicities
double total_placement_mass_with_multiplicities(Sample const &smp)
Get the mass of all PqueryPlacements of the Sample, using the multiplicities as factors.
Definition: masses.cpp:142
emd.hpp
genesis::placement::Sample::tree
PlacementTree & tree()
Get the PlacementTree of this Sample.
Definition: sample.cpp:124
genesis::placement::earth_movers_distance
double earth_movers_distance(Sample const &lhs, Sample const &rhs, double const p, bool const with_pendant_length)
Calculate the earth mover's distance between two Samples.
Definition: placement/function/emd.cpp:67
functions.hpp
CommonTree functions.
genesis::placement::SampleSet
Store a set of Samples with associated names.
Definition: sample_set.hpp:54
genesis::placement::Sample
Manage a set of Pqueries along with the PlacementTree where the PqueryPlacements are placed on.
Definition: sample.hpp:68
emd.hpp
tree.hpp
Header of Tree class.
tree_set.hpp
tree_set.hpp
sample_set.hpp
functions.hpp
Provides functions for working with Placements and Pqueries.
genesis::utils::Matrix< double >
genesis::placement::add_sample_to_mass_tree
double add_sample_to_mass_tree(Sample const &smp, double const sign, double const scaler, tree::MassTree &target)
Helper function to copy masses from a Sample to a MassTree.
Definition: placement/function/operators.cpp:132
genesis::tree::TreeSet
Definition: tree_set.hpp:48
tree.hpp
masses.hpp
matrix.hpp
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
operators.hpp
genesis::tree::convert_common_tree_to_mass_tree
MassTree convert_common_tree_to_mass_tree(CommonTree const &source)
Helper function that takes a CommonTree (or any Tree with Node and Edge data derived from it) and tur...
Definition: tree/mass_tree/tree.hpp:213
genesis::placement::SampleSet::size
size_t size() const
Return the size of the SampleSet, i.e., the number of Samples.
Definition: sample_set.hpp:234
genesis::tree::earth_movers_distance
double earth_movers_distance(MassTree const &lhs, MassTree const &rhs, double const p)
Calculate the earth mover's distance of two distributions of masses on a given Tree.
Definition: tree/mass_tree/emd.cpp:60
functions.hpp
genesis::placement::convert_sample_set_to_mass_trees
std::pair< tree::TreeSet, std::vector< double > > convert_sample_set_to_mass_trees(SampleSet const &sample_set, bool normalize)
Convert all Samples in a SampleSet to tree::MassTrees.
Definition: placement/function/operators.cpp:185
genesis::tree::average_branch_length_tree
Tree average_branch_length_tree(std::vector< Tree > const &tset)
Return a Tree where the branch lengths are the average of the Trees in the given vector of Trees or T...
Definition: tree/common_tree/functions.cpp:242
sample.hpp
helper.hpp