50 #include <unordered_set>
65 std::vector<MassTree>
const& trees,
75 for(
auto const& tree : trees ) {
76 if( ! tree_data_is< MassTreeNodeData, MassTreeEdgeData >( tree ) ) {
77 throw std::invalid_argument(
78 "Tree is not a MassTree. Cannot calculate its balance data."
83 throw std::invalid_argument(
84 "Trees do not have identical topology. Cannot calculate their balance data."
93 throw std::invalid_argument(
94 "Pseudo-count summands in balance settings have to be non-negative numbers."
112 auto const sum = std::accumulate(
116 throw std::runtime_error(
117 "Cannot calculate balance data on trees with normalized masses."
125 auto tendencies = std::vector<double>( result.
tree.
edge_count(), 1.0 );
128 #pragma omp parallel for
129 for(
size_t c = 0; c < tendencies.size(); ++c ) {
152 for(
auto& e : raw ) {
162 assert( std::isfinite( tendencies[c] ));
163 assert( tendencies[c] >= 0.0 );
172 auto norms = std::vector<double>( result.
tree.
edge_count(), 1.0 );
179 assert( edge_masses_cpy.cols() == norms.size() );
180 for(
size_t r = 0; r < edge_masses_cpy.rows(); ++r ) {
181 utils::closure( edge_masses_cpy.row(r).begin(), edge_masses_cpy.row(r).end() );
185 #pragma omp parallel for
186 for(
size_t c = 0; c < norms.size(); ++c ) {
189 auto em_beg = edge_masses_cpy.col(c).begin();
190 auto em_end = edge_masses_cpy.col(c).end();
192 switch( settings.
norm ) {
218 assert( std::isfinite( norms[c] ));
219 assert( norms[c] >= 0.0 );
225 if( trees.size() > 1 ) {
232 assert( std::isfinite( tendencies[c] ) && tendencies[c] >= 0.0 );
233 assert( std::isfinite( norms[c] ) && norms[c] >= 0.0 );
241 bool found_zero_mass =
false;
243 assert( std::isfinite( em ) && em >= 0.0 );
245 found_zero_mass =
true;
255 throw std::runtime_error(
256 "The balance data contains edge masses with value 0, but no pseudo counts were used. "
257 "As such 0 values cannot be used in the balances calculations, you must add pseudo "
258 "counts to them, e.g., via the BalanceSettings."
272 assert( trees.size() > 1 );
280 #pragma omp parallel for
287 assert( std::isfinite( edge_mass ) && ( edge_mass > 0.0 ));
290 assert( std::isfinite( taxon_weight ) && taxon_weight >= 0.0 );
309 if( taxon_weight == 0.0 ) {
322 edge_mass /= taxon_weight;
324 assert( std::isfinite( edge_mass ) && ( edge_mass > 0.0 ));
343 auto const result =
mass_balance_data( std::vector<MassTree>{ tree }, settings );
346 assert( result.edge_masses.rows() == 1 );
347 assert( result.edge_masses.cols() == tree.
edge_count() );
348 assert( result.taxon_weights.empty() );
359 std::unordered_set<size_t>
const& numerator_edge_indices,
360 std::unordered_set<size_t>
const& denominator_edge_indices
364 #pragma omp parallel for
366 result[r] =
mass_balance( data, numerator_edge_indices, denominator_edge_indices, r );
374 std::unordered_set<size_t>
const& numerator_edge_indices,
375 std::unordered_set<size_t>
const& denominator_edge_indices,
380 throw std::runtime_error(
381 "Invalid BalanceData: Cannot calculate balance of an empty tree."
385 throw std::runtime_error(
386 "Invalid BalanceData: Taxon weights need to have same size as edge masses."
390 throw std::runtime_error(
391 "Invalid BalanceData: Edge masses need to have same size as the Tree has edges."
394 if( numerator_edge_indices.empty() || denominator_edge_indices.empty() ) {
395 throw std::invalid_argument(
"Cannot calculate mass balance of empty edge sets." );
398 throw std::invalid_argument(
399 "Cannot calculate mass balance with tree index greater than number of trees."
414 auto calc_mass_mean_and_scaling_ = [ &data, &tree_index ](
415 std::unordered_set<size_t>
const& indices
418 std::vector<double> sub_masses;
419 std::vector<double> sub_weights;
420 sub_masses.reserve( indices.size() );
421 sub_weights.reserve( indices.size() );
424 for(
auto idx : indices ) {
428 throw std::runtime_error(
"Invalid edge index in mass balance calculation." );
431 assert( std::isfinite( data.
edge_masses( tree_index, idx )));
432 assert( data.
edge_masses( tree_index, idx ) >= 0.0 );
433 sub_masses.push_back( data.
edge_masses( tree_index, idx ));
437 sub_weights.push_back( 1.0 );
444 assert( sub_masses.size() == indices.size() );
445 assert( sub_weights.size() == indices.size() );
450 double scaling_n = std::accumulate( sub_weights.begin(), sub_weights.end(), 0.0 );
451 assert( std::isfinite( scaling_n ));
453 if( scaling_n > 0.0 ) {
455 assert( std::isfinite( geom_mean ));
457 geom_mean = std::numeric_limits<double>::quiet_NaN();
466 return BalanceTerms{ geom_mean, scaling_n };
470 auto const num = calc_mass_mean_and_scaling_( numerator_edge_indices );
471 auto const den = calc_mass_mean_and_scaling_( denominator_edge_indices );
472 assert( std::isnan( num.mean ) || ( num.mean > 0.0 ));
473 assert( std::isnan( den.mean ) || ( den.mean > 0.0 ));
476 double const scaling = std::sqrt(( num.scaling * den.scaling ) / ( num.scaling + den.scaling ));
477 double const balance = scaling * std::log( num.mean / den.mean );
478 assert( std::isnan( balance ) || std::isfinite( balance ));