50 #include <unordered_set> 65 std::vector<MassTree>
const& trees,
75 for(
auto const& tree : trees ) {
76 if( ! tree_data_is< MassTreeNodeData, MassTreeEdgeData >( tree ) ) {
77 throw std::invalid_argument(
78 "Tree is not a MassTree. Cannot calculate its balance data." 83 throw std::invalid_argument(
84 "Trees do not have identical topology. Cannot calculate their balance data." 93 throw std::invalid_argument(
94 "Pseudo-count summands in balance settings have to be non-negative numbers." 112 auto const sum = std::accumulate(
116 throw std::runtime_error(
117 "Cannot calculate balance data on trees with normalized masses." 125 auto tendencies = std::vector<double>( result.
tree.
edge_count(), 1.0 );
128 #pragma omp parallel for 129 for(
size_t c = 0; c < tendencies.size(); ++c ) {
152 for(
auto& e : raw ) {
162 assert( std::isfinite( tendencies[c] ));
163 assert( tendencies[c] >= 0.0 );
172 auto norms = std::vector<double>( result.
tree.
edge_count(), 1.0 );
179 assert( edge_masses_cpy.cols() == norms.size() );
180 for(
size_t r = 0; r < edge_masses_cpy.rows(); ++r ) {
181 utils::closure( edge_masses_cpy.row(r).begin(), edge_masses_cpy.row(r).end() );
185 #pragma omp parallel for 186 for(
size_t c = 0; c < norms.size(); ++c ) {
189 auto em_beg = edge_masses_cpy.col(c).begin();
190 auto em_end = edge_masses_cpy.col(c).end();
192 switch( settings.
norm ) {
218 assert( std::isfinite( norms[c] ));
219 assert( norms[c] >= 0.0 );
225 if( trees.size() > 1 ) {
232 assert( std::isfinite( tendencies[c] ) && tendencies[c] >= 0.0 );
233 assert( std::isfinite( norms[c] ) && norms[c] >= 0.0 );
241 bool found_zero_mass =
false;
243 assert( std::isfinite( em ) && em >= 0.0 );
245 found_zero_mass =
true;
255 throw std::runtime_error(
256 "The balance data contains edge masses with value 0, but no pseudo counts were used. " 257 "As such 0 values cannot be used in the balances calculations, you must add pseudo " 258 "counts to them, e.g., via the BalanceSettings." 272 assert( trees.size() > 1 );
280 #pragma omp parallel for 287 assert( std::isfinite( edge_mass ) && ( edge_mass > 0.0 ));
290 assert( std::isfinite( taxon_weight ) && taxon_weight >= 0.0 );
309 if( taxon_weight == 0.0 ) {
322 edge_mass /= taxon_weight;
324 assert( std::isfinite( edge_mass ) && ( edge_mass > 0.0 ));
343 auto const result =
mass_balance_data( std::vector<MassTree>{ tree }, settings );
346 assert( result.edge_masses.rows() == 1 );
347 assert( result.edge_masses.cols() == tree.
edge_count() );
348 assert( result.taxon_weights.empty() );
359 std::unordered_set<size_t>
const& numerator_edge_indices,
360 std::unordered_set<size_t>
const& denominator_edge_indices
364 #pragma omp parallel for 366 result[r] =
mass_balance( data, numerator_edge_indices, denominator_edge_indices, r );
374 std::unordered_set<size_t>
const& numerator_edge_indices,
375 std::unordered_set<size_t>
const& denominator_edge_indices,
380 throw std::runtime_error(
381 "Invalid BalanceData: Cannot calculate balance of an empty tree." 385 throw std::runtime_error(
386 "Invalid BalanceData: Taxon weights need to have same size as edge masses." 390 throw std::runtime_error(
391 "Invalid BalanceData: Edge masses need to have same size as the Tree has edges." 394 if( numerator_edge_indices.empty() || denominator_edge_indices.empty() ) {
395 throw std::invalid_argument(
"Cannot calculate mass balance of empty edge sets." );
398 throw std::invalid_argument(
399 "Cannot calculate mass balance with tree index greater than number of trees." 414 auto calc_mass_mean_and_scaling_ = [ &data, &tree_index ](
415 std::unordered_set<size_t>
const& indices
418 std::vector<double> sub_masses;
419 std::vector<double> sub_weights;
420 sub_masses.reserve( indices.size() );
421 sub_weights.reserve( indices.size() );
424 for(
auto idx : indices ) {
428 throw std::runtime_error(
"Invalid edge index in mass balance calculation." );
431 assert( std::isfinite( data.
edge_masses( tree_index, idx )));
432 assert( data.
edge_masses( tree_index, idx ) >= 0.0 );
433 sub_masses.push_back( data.
edge_masses( tree_index, idx ));
437 sub_weights.push_back( 1.0 );
444 assert( sub_masses.size() == indices.size() );
445 assert( sub_weights.size() == indices.size() );
450 double scaling_n = std::accumulate( sub_weights.begin(), sub_weights.end(), 0.0 );
451 assert( std::isfinite( scaling_n ));
453 if( scaling_n > 0.0 ) {
455 assert( std::isfinite( geom_mean ));
457 geom_mean = std::numeric_limits<double>::quiet_NaN();
466 return BalanceTerms{ geom_mean, scaling_n };
470 auto const num = calc_mass_mean_and_scaling_( numerator_edge_indices );
471 auto const den = calc_mass_mean_and_scaling_( denominator_edge_indices );
472 assert( std::isnan( num.mean ) || ( num.mean > 0.0 ));
473 assert( std::isnan( den.mean ) || ( den.mean > 0.0 ));
476 double const scaling = std::sqrt(( num.scaling * den.scaling ) / ( num.scaling + den.scaling ));
477 double const balance = scaling * std::log( num.mean / den.mean );
478 assert( std::isnan( balance ) || std::isfinite( balance ));
double geometric_mean(ForwardIterator first, ForwardIterator last)
Calculate the geometric mean of a range of positive numbers.
double euclidean_norm(ForwardIterator first, ForwardIterator last)
Calculate the Euclidean norm (L2 norm) of a range of numbers.
Use the Aitchison norm of the relative abundances of the taxon.
double finite_minimum(ForwardIterator first, ForwardIterator last)
Return the minimum of a range of double values.
Use the geometric mean of the taxon masses.
Use the Euclidean norm of the relative abundances of the taxon.
Use the Maximum norm of the relative abundances of the taxon.
double manhattan_norm(ForwardIterator first, ForwardIterator last)
Calculate the Manhattan norm (L1 norm) of a range of numbers.
WeightTendency tendency
Set the term for asssing the central tendency of taxon masses for calculating the taxon weights...
WeightNorm norm
Set the term for the norm of relative abundances for calculating the taxon weights.
double aitchison_norm(ForwardIterator first, ForwardIterator last)
Calculate the Aitchison norm of a range of positive numbers.
BalanceData mass_balance_data(std::vector< MassTree > const &trees, BalanceSettings settings)
Calculate the data needed to calculate balances on MassTrees.
bool empty() const
Return whether the Tree is empty (i.e., has no nodes, edges and links).
utils::Matrix< double > raw_edge_masses
The absolute raw per-edge masses per original input Tree.
size_t edge_count() const
Return the number of TreeEdges of the Tree.
double median(const Histogram &h)
double sum(const Histogram &h)
double pseudo_count_summand_zeros
Set the constant that is added to taxon masses that are zero, to avoid zero counts.
Tree average_branch_length_tree(std::vector< Tree > const &tset)
Return a Tree where the branch lengths are the average of the Trees in the given vector of Trees or T...
Data needed to calculate balances.
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
Use no norm, that is, use 1.0 for this term.
Use no tendency, that is, use 1.0 for this term.
void closure(ForwardIterator first, ForwardIterator last)
Calculate the closure of a range of numbers.
std::vector< double > mass_tree_mass_per_edge(MassTree const &tree)
Return a std::vector that contains the total mass for each edge of the given MassTree.
double weighted_geometric_mean(ForwardIterator first_value, ForwardIterator last_value, ForwardIterator first_weight, ForwardIterator last_weight)
Calculate the weighted geometric mean of a range of positive numbers.
double mean(const Histogram &h)
Compute the bin-weighted arithmetic mean.
MatrixCol< self_type, value_type > col(size_t col)
Class for representing phylogenetic trees.
Settings to calculate balances and the Phylogenetic ILR Transform.
MatrixRow< self_type, value_type > row(size_t row)
double maximum_norm(ForwardIterator first, ForwardIterator last)
Calculate the Maximum norm (infinity norm) of a range of numbers.
double arithmetic_mean(ForwardIterator first, ForwardIterator last)
Calculate the arithmetic mean of a range of numbers.
CommonTree convert_to_common_tree(Tree const &source_tree)
Convert a Tree to a CommonTree with CommonNodeData and CommonEdgeData.
double pseudo_count_summand_all
Set the constant that is added to all taxon masses to avoid zero counts.
std::vector< double > mass_balance(BalanceData const &data, std::unordered_set< size_t > const &numerator_edge_indices, std::unordered_set< size_t > const &denominator_edge_indices)
Calculate the balance of edge masses between two sets of edges.
Use the Manhattan norm of the relative abundances of the taxon.
bool identical_topology(Tree const &lhs, Tree const &rhs, bool identical_indices)
Return whether both trees have an identical topology.
Use the arithmetic mean of the taxon masses.
Tree tree
The Tree on which to calculate balances.
std::vector< double > taxon_weights
The taxon/edge weights calculated from mulitple trees.
utils::Matrix< double > edge_masses
The relative per-edge masses per original input Tree.
Use the median of the taxon masses.
bool almost_equal_relative(double lhs, double rhs, double max_rel_diff=std::numeric_limits< double >::epsilon())
Check whether two doubles are almost equal, using a relative epsilon to compare them.