56 return accumulate_centroid_masses_;
61 accumulate_centroid_masses_ = value;
68 void MassTreeKmeans::pre_loop_hook(
69 std::vector<Point>
const& data,
76 if( accumulate_centroid_masses_ == 1 ) {
77 for(
auto& centroid : centroids ) {
80 }
else if( accumulate_centroid_masses_ > 1 ) {
81 for(
auto& centroid : centroids ) {
87 bool MassTreeKmeans::data_validation( std::vector<Point>
const& data )
const 90 for(
auto const& tree : data ) {
91 if( ! tree_data_is< MassTreeNodeData, MassTreeEdgeData >( tree ) ) {
92 throw std::invalid_argument(
"Trees for Kmeans do not have MassTree data types." );
98 for (
size_t i = 1; i < data.size(); i++) {
100 throw std::invalid_argument(
"Trees for Kmeans do not have identical topologies." );
107 void MassTreeKmeans::update_centroids(
108 std::vector<Point>
const& data,
109 std::vector<size_t>
const& assignments,
110 std::vector<Point>& centroids
113 auto const k = centroids.size();
116 #pragma omp parallel for 117 for(
size_t c = 0; c < k; ++c ) {
123 assert( data.size() == assignments.size() );
125 #ifdef GENESIS_OPENMP 128 #pragma omp parallel for 129 for(
size_t c = 0; c < k; ++c ) {
132 auto& centroid = centroids[ c ];
136 for(
size_t d = 0; d < data.size(); ++d ) {
139 assert( assignments[ d ] < k );
142 if( assignments[ d ] != c ) {
158 if( accumulate_centroid_masses_ == 1 ) {
160 }
else if( accumulate_centroid_masses_ > 1 ) {
171 auto counts = std::vector<size_t>( k, 0 );
174 for(
size_t d = 0; d < data.size(); ++d ) {
177 assert( assignments[ d ] < k );
181 ++counts[ assignments[ d ] ];
185 for(
size_t c = 0; c < k; ++c ) {
194 if( accumulate_centroid_masses_ == 1 ) {
196 }
else if( accumulate_centroid_masses_ > 1 ) {
205 double MassTreeKmeans::distance(
Point const& lhs,
Point const& rhs )
const std::vector< MassTree > const & centroids() const
double mass_tree_center_masses_on_branches_averaged(MassTree &tree)
Accumulate all masses of a MassTree at the average mass position per edge.
double earth_movers_distance(MassTree const &lhs, MassTree const &rhs, double const p)
Calculate the earth mover's distance of two distributions of masses on a given Tree.
double mass_tree_sum_of_masses(MassTree const &tree)
Return the total sum of all masses on the MassTree.
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
std::vector< size_t > const & assignments() const
Class for representing phylogenetic trees.
void mass_tree_merge_trees_inplace(MassTree &lhs, MassTree const &rhs, double const scaler_lhs, double const scaler_rhs)
Merge all masses of two MassTrees by adding them to the first MassTree.
size_t accumulate_centroid_masses() const
void mass_tree_clear_masses(MassTree &tree)
Clear all masses of a MassTree, while keeping its topology.
bool identical_topology(Tree const &lhs, Tree const &rhs, bool identical_indices)
Return whether both trees have an identical topology.
void mass_tree_normalize_masses(MassTree &tree)
Scale all masses of a MassTree so that they sum up to 1.0.
double mass_tree_binify_masses(MassTree &tree, size_t number_of_bins)
Accumulate all masses of a MassTree into bins on each branch.
bool almost_equal_relative(double lhs, double rhs, double max_rel_diff=std::numeric_limits< double >::epsilon())
Check whether two doubles are almost equal, using a relative epsilon to compare them.