A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
squash_clustering.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_TREE_MASS_TREE_SQUASH_CLUSTERING_H_
2 #define GENESIS_TREE_MASS_TREE_SQUASH_CLUSTERING_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
35 
36 #include <cstddef>
37 #include <functional>
38 #include <string>
39 #include <utility>
40 #include <vector>
41 
42 namespace genesis {
43 namespace tree {
44 
45 // =================================================================================================
46 // Squash Clustering
47 // =================================================================================================
48 
55 {
56 public:
57 
58  // -------------------------------------------------------------------------
59  // Typedefs and Constants
60  // -------------------------------------------------------------------------
61 
62  struct Cluster
63  {
71 
78  size_t count;
79 
85  bool active;
86 
95  std::vector<double> distances;
96  };
97 
98  struct Merger
99  {
103  size_t index_a;
104 
108  double distance_a;
109 
113  size_t index_b;
114 
118  double distance_b;
119  };
120 
121  // -------------------------------------------------------------------------
122  // Constructor and Rule of Five
123  // -------------------------------------------------------------------------
124 
125  SquashClustering() = default;
126  ~SquashClustering() = default;
127 
128  SquashClustering( SquashClustering const& ) = default;
129  SquashClustering( SquashClustering&& ) = default;
130 
131  SquashClustering& operator= ( SquashClustering const& ) = default;
133 
134  // -------------------------------------------------------------------------
135  // Public Functions
136  // -------------------------------------------------------------------------
137 
147  void run( std::vector<MassTree>&& trees );
148 
156  std::string tree_string( std::vector<std::string> const& labels ) const;
157 
158  SquashClustering& p( double value )
159  {
160  p_ = value;
161  return *this;
162  }
163 
164  double p() const
165  {
166  return p_;
167  }
168 
169  std::vector<Cluster> const& clusters() const
170  {
171  return clusters_;
172  }
173 
174  std::vector<Merger> const& mergers() const
175  {
176  return mergers_;
177  }
178 
182  void clear();
183 
184  // -------------------------------------------------------------------------
185  // Progress Report
186  // -------------------------------------------------------------------------
187 
188  std::function<void( void )> report_initialization;
189  std::function<void( size_t i, size_t total )> report_step;
190 
191  // -------------------------------------------------------------------------
192  // Private Functions
193  // -------------------------------------------------------------------------
194 
195 private:
196 
197  void init_( std::vector<MassTree>&& trees );
198  std::pair<size_t, size_t> min_entry_() const;
199  void merge_clusters_( size_t i, size_t j );
200 
201  // -------------------------------------------------------------------------
202  // Data Members
203  // -------------------------------------------------------------------------
204 
205 private:
206 
207  double p_ = 1.0;
208 
209  std::vector<Cluster> clusters_;
210  std::vector<Merger> mergers_;
211 };
212 
213 } // namespace tree
214 } // namespace genesis
215 
216 #endif // include guard
std::vector< double > distances
Distances from this cluster to all clusters with a lower index in the clusters vector.
std::string tree_string(std::vector< std::string > const &labels) const
Build a Newick-format tree for visualizing the result of a squash_clustering().
size_t index_a
Index of the first data point in the cluster.
std::function< void(void)> report_initialization
std::function< void(size_t i, size_t total)> report_step
MassTree tree
The MassTree that this cluster represetns.
double distance_a
Distance of the first data point to the cluster node.
SquashClustering & p(double value)
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:95
double distance_b
Distance of the second data point to the cluster node.
size_t index_b
Index of the second data point in the cluster.
size_t count
How many end points (Samples) does this cluster represent?
std::vector< Cluster > const & clusters() const
Perform Squash Clustering.
bool active
Is this cluster active, i.e., is it not yet part of a larger cluster?
SquashClustering & operator=(SquashClustering const &)=default
std::unordered_set< std::string > labels(SequenceSet const &set)
Return a set of all labels of the SequenceSet.
Definition: labels.cpp:62
void run(std::vector< MassTree > &&trees)
Perfom Squash Clustering.
std::vector< Merger > const & mergers() const
void clear()
Clear the clusters() and mergers() data.