A toolkit for working with phylogenetic data.
v0.19.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
epca.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_PLACEMENT_FUNCTION_EPCA_H_
2 #define GENESIS_PLACEMENT_FUNCTION_EPCA_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
36 
37 #include <vector>
38 
39 namespace genesis {
40 
41 // =================================================================================================
42 // Forward Declarations
43 // =================================================================================================
44 
45 namespace tree {
46 
47  class Tree;
48 
49  class DefaultNodeData;
50  class DefaultEdgeData;
51 
52  using DefaultTree = Tree;
53 }
54 
55 namespace placement {
56  class PlacementEdgeData;
57  class PlacementNodeData;
58 
59  class Sample;
60  class SampleSet;
61 }
62 
63 namespace placement {
64 
65 // =================================================================================================
66 // Edge PCA
67 // =================================================================================================
68 
76 struct EpcaData
77 {
78  std::vector<double> eigenvalues;
81  std::vector<size_t> edge_indices;
82 };
83 
106 std::vector<double> epca_imbalance_vector( Sample const& sample, bool normalize = true );
107 
134  SampleSet const& samples,
135  bool include_leaves = false,
136  bool normalize = true
137 );
138 
156  utils::Matrix<double>& imbalance_matrix,
157  double kappa = 1.0
158 );
159 
174 std::vector<size_t> epca_filter_constant_columns(
175  utils::Matrix<double>& imbalance_matrix,
176  double epsilon = 1e-5
177 );
178 
190 EpcaData epca(
191  SampleSet const& samples,
192  double kappa = 1.0,
193  double epsilon = 1e-5,
194  size_t components = 0
195 );
196 
197 } // namespace placement
198 } // namespace genesis
199 
200 #endif // include guard
Tree DefaultTree
Alias for a Tree with data types DefaultNodeData and DefaultEdgeData.
utils::Matrix< double > epca_imbalance_matrix(SampleSet const &samples, bool include_leaves, bool normalize)
Calculate the imbalance matrix of placment mass for all Samples in a SampleSet.
Definition: epca.cpp:166
EpcaData epca(SampleSet const &samples, double kappa, double epsilon, size_t components)
Perform EdgePCA on a SampleSet.
Definition: epca.cpp:313
utils::Matrix< double > eigenvectors
Definition: epca.hpp:79
Store a set of Samples with associated names.
Definition: sample_set.hpp:52
Helper stucture that collects the output of epca().
Definition: epca.hpp:76
std::vector< double > eigenvalues
Definition: epca.hpp:78
void normalize(Histogram &h, double total)
Definition: operations.cpp:61
Manage a set of Pqueries along with the PlacementTree where the PqueryPlacements are placed on...
Definition: sample.hpp:68
std::vector< double > epca_imbalance_vector(Sample const &sample, bool normalize)
Calculate the imbalance of placement mass for each Edge of the given Sample.
Definition: epca.cpp:64
utils::Matrix< double > projection
Definition: epca.hpp:80
void epca_splitify_transform(utils::Matrix< double > &imbalance_matrix, double kappa)
Perform a component-wise transformation of the imbalance matrix used for epca().
Definition: epca.cpp:248
std::vector< size_t > epca_filter_constant_columns(utils::Matrix< double > &imbalance_matrix, double epsilon)
Filter out columns that have nearly constant values, measured using an epsilon.
Definition: epca.cpp:279
std::vector< size_t > edge_indices
Definition: epca.hpp:81