A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
squash_clustering.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_TREE_MASS_TREE_SQUASH_CLUSTERING_H_
2 #define GENESIS_TREE_MASS_TREE_SQUASH_CLUSTERING_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2017 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
35 
36 #include <cstddef>
37 #include <string>
38 #include <utility>
39 #include <vector>
40 
41 namespace genesis {
42 namespace tree {
43 
44 // =================================================================================================
45 // Squash Clustering
46 // =================================================================================================
47 
52 {
53 public:
54 
55  // -------------------------------------------------------------------------
56  // Typedefs and Constants
57  // -------------------------------------------------------------------------
58 
59  struct Cluster
60  {
68 
75  size_t count;
76 
82  bool active;
83 
92  std::vector<double> distances;
93  };
94 
95  struct Merger
96  {
100  size_t index_a;
101 
105  double distance_a;
106 
110  size_t index_b;
111 
115  double distance_b;
116  };
117 
118  // -------------------------------------------------------------------------
119  // Data Members
120  // -------------------------------------------------------------------------
121 
122  std::vector<Cluster> clusters;
123  std::vector<Merger> mergers;
124 };
125 
135 SquashClustering squash_clustering( std::vector<MassTree>&& trees, double const p = 1.0 );
136 
144 std::string squash_cluster_tree( SquashClustering const& sc, std::vector<std::string> const& labels );
145 
146 } // namespace tree
147 } // namespace genesis
148 
149 #endif // include guard
std::vector< double > distances
Distances from this cluster to all clusters with a lower index in the clusters vector.
size_t index_a
Index of the first data point in the cluster.
MassTree tree
The MassTree that this cluster represetns.
Result structure for Squash Clustering.
double distance_a
Distance of the first data point to the cluster node.
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:95
double distance_b
Distance of the second data point to the cluster node.
size_t index_b
Index of the second data point in the cluster.
std::string squash_cluster_tree(SquashClustering const &sc, std::vector< std::string > const &labels)
Build a Newick-format tree for visualizing the result of a squash_clustering().
size_t count
How many end points (Samples) does this cluster represent?
SquashClustering squash_clustering(std::vector< MassTree > &&trees, double const p)
Perfom Squash Clustering.
bool active
Is this cluster active, i.e., is it not yet part of a larger cluster?
std::unordered_set< std::string > labels(SequenceSet const &set)
Return a set of all labels of the SequenceSet.
Definition: labels.cpp:58