57 void SquashClustering::init_( std::vector<MassTree>&& trees )
63 clusters_.resize( trees.size() );
64 for(
size_t i = 0; i < trees.size(); ++i ) {
65 clusters_[i].tree = std::move( trees[i] );
66 clusters_[i].count = 1;
67 clusters_[i].active =
true;
75 for(
size_t i = 0; i < clusters_.size(); ++i ) {
79 clusters_[i].distances.resize( i );
82 #pragma omp parallel for
83 for(
size_t k = 0; k < i; ++k ) {
85 clusters_[i].distances[k] = dist;
95 std::pair<size_t, size_t> SquashClustering::min_entry_()
const
100 double min_d = std::numeric_limits<double>::max();
103 #pragma omp parallel for schedule(dynamic)
104 for(
size_t i = 0; i < clusters_.size(); ++i ) {
105 if( ! clusters_[i].active ) {
110 assert( clusters_[i].distances.size() == i );
111 for(
size_t j = 0; j < i; ++j ) {
112 if( ! clusters_[j].active ) {
117 #pragma omp flush( min_d )
118 if( clusters_[i].distances[j] < min_d ) {
119 #pragma omp critical( GENESIS_TREE_MASS_TREE_SQUASH_CLUSTERING_MIN_ENTRY_UPDATE )
121 if( clusters_[i].distances[j] < min_d ) {
124 min_d = clusters_[i].distances[j];
132 assert( min_i > min_j );
133 return { min_j, min_i };
136 void SquashClustering::merge_clusters_(
size_t i,
size_t j )
139 assert( i < clusters_.size() && j < clusters_.size() );
140 assert( i < clusters_[j].distances.size() );
143 clusters_.emplace_back();
144 auto& new_cluster = clusters_.back();
147 auto const weight_i =
static_cast<double>( clusters_[i].count );
148 auto const weight_j =
static_cast<double>( clusters_[j].count );
150 clusters_[i].tree, clusters_[j].tree, weight_i, weight_j
157 assert( clusters_.size() > 0 );
172 new_cluster.count = clusters_[i].count + clusters_[j].count;
173 new_cluster.active =
true;
174 new_cluster.distances.resize( clusters_.size() - 1, 0.0 );
179 #pragma omp parallel for schedule(dynamic)
180 for(
size_t k = 0; k < clusters_.size() - 1; ++k ) {
181 if( ! clusters_[k].active ) {
186 new_cluster.distances[k] = dist;
191 mergers_.push_back({ i, new_cluster.distances[i], j, new_cluster.distances[j] });
200 clusters_[i].active =
false;
201 clusters_[j].active =
false;
204 clusters_[i].distances.clear();
205 clusters_[j].distances.clear();
208 clusters_[i].tree.clear();
209 clusters_[j].tree.clear();
215 auto const tree_count = trees.size();
222 init_( std::move( trees ) );
225 for(
size_t i = 0; i < tree_count - 1; ++i ) {
231 auto const min_pair = min_entry_();
232 assert( min_pair.first < min_pair.second );
234 merge_clusters_( min_pair.first, min_pair.second );
239 size_t count_active = 0;
240 for(
auto const& c : clusters_ ) {
249 assert( tree_count + mergers_.size() == clusters_.size() );
254 if(
labels.size() != clusters_.size() - mergers_.size() ) {
255 throw std::runtime_error(
256 "List of labels does not have the correct size for the number of squash cluster elements."
261 for(
size_t i = 0; i < mergers_.size(); ++i ) {
262 auto const& cm = mergers_[i];
264 auto node_a = list[ cm.index_a ] +
":" +
std::to_string( cm.distance_a );
265 auto node_b = list[ cm.index_b ] +
":" +
std::to_string( cm.distance_b );
267 list[ cm.index_a ].clear();
268 list[ cm.index_b ].clear();
273 return list.back() +
";";