75 size_t const histogram_bins
79 throw std::runtime_error(
"Tree is empty. Cannot use Node Histogram Distance." );
82 throw std::runtime_error(
"Node Distance Matrix has wrong size." );
85 throw std::runtime_error(
"Node Sides Matrix has wrong size." );
94 #pragma omp parallel for
95 for(
size_t node_idx = 0; node_idx <
node_count; ++node_idx ) {
101 for(
size_t col_idx = 0; col_idx <
node_count; ++col_idx ) {
104 if( col_idx == node_idx ) {
110 auto const dist = node_distances( node_idx, col_idx );
113 if( node_sides( node_idx, col_idx ) == 1 && dist > max ) {
118 if( node_sides( node_idx, col_idx ) == -1 && dist > min ) {
124 if( min == 0.0 && max == 0.0 ) {
125 throw std::runtime_error(
126 "Tree only has branch lengths with value 0. Cannot use Node Histogram Distance."
130 histogram_set.
histograms[ node_idx ].min = -min;
131 histogram_set.
histograms[ node_idx ].max = max;
132 histogram_set.
histograms[ node_idx ].bins = std::vector<double>( histogram_bins, 0.0 );
135 return histogram_set;
154 throw std::runtime_error(
"Number of histograms does not equal number of tree nodes." );
157 throw std::runtime_error(
"Node Distance Matrix has wrong size." );
160 throw std::runtime_error(
"Node Sides Matrix has wrong size." );
167 for(
auto const& pquery : sample ) {
170 for(
auto const& placement : pquery.placements() ) {
171 auto& place = placements[pcnt];
173 place.edge_index = placement.edge().index();
174 place.primary_node_index = placement.edge().primary_node().index();
175 place.secondary_node_index = placement.edge().secondary_node().index();
179 place.pendant_length = placement.pendant_length;
180 place.proximal_length = placement.proximal_length;
184 place.like_weight_ratio = placement.like_weight_ratio * mult;
191 for(
size_t node_index = 0; node_index < sample.tree().node_count(); ++node_index ) {
194 auto& histogram = histogram_set.
histograms[ node_index ];
195 const auto min = histogram.min;
196 const auto max = histogram.max;
197 const auto bins = histogram.bins.size();
198 const double bin_width = ( max - min ) /
static_cast<double>( bins );
202 for(
auto const& placement : placements ) {
205 double const p_dist = placement.proximal_length
206 + node_distances( node_index, placement.primary_node_index )
208 double const d_dist = placement.branch_length
209 - placement.proximal_length
210 + node_distances( node_index, placement.secondary_node_index )
212 double const dist = std::min( p_dist, d_dist );
217 auto const side = node_sides( node_index, placement.primary_node_index );
218 double const sign = ( side == 1 ? 1.0 : -1.0 );
221 auto const x = sign * dist;
225 }
else if( x >= max ) {
228 bin =
static_cast<size_t>(( x - min ) / bin_width );
229 assert( bin < bins );
233 histogram.bins[ bin ] += placement.like_weight_ratio;
234 sum += placement.like_weight_ratio;
238 for(
auto& val : histogram.bins ) {
256 size_t const histogram_bins
260 sample.
tree(), node_distances, node_sides, histogram_bins
275 throw std::runtime_error(
276 "Cannot calculate distance between NodeDistanceHistograms of different dimensions."
281 auto entries = std::vector<double>( lhs.
bins.size(), 0.0 );
283 const double bin_width = ( lhs.
max - lhs.
min ) /
static_cast<double>( lhs.
bins.size() );
286 for(
size_t i = 0; i < lhs.
bins.size() - 1; ++i) {
287 entries[i + 1] = lhs.
bins[i] + entries[i] - rhs.
bins[i];
288 result += std::abs( entries[i + 1] ) * bin_width;
299 throw std::runtime_error(
300 "Cannot calculate distance between NodeDistanceHistogramSets of different size."
307 for(
size_t k = 0; k < lhs.
histograms.size(); ++k ) {
310 assert( dist >= 0.0 );
313 dist /=
static_cast< double >( lhs.
histograms.size() );
318 std::vector<NodeDistanceHistogramSet>
const& histogram_sets
321 auto const set_size = histogram_sets.size();
325 #ifdef GENESIS_OPENMP
332 #pragma omp parallel for
333 for(
size_t k = 0; k < max_k; ++k ) {
337 auto const i = ij.first;
338 auto const j = ij.second;
350 for(
size_t i = 0; i < set_size; ++i ) {
353 for(
size_t j = i + 1; j < set_size; ++j ) {
377 size_t const histogram_bins
387 sample.
tree(), node_distances, node_sides, histogram_bins
396 size_t const histogram_bins
399 throw std::invalid_argument(
"Incompatible trees." );
408 assert( hist_vec_a.histograms.size() == sample_a.
tree().
node_count() );
409 assert( hist_vec_b.histograms.size() == sample_b.
tree().
node_count() );
410 assert( hist_vec_a.histograms.size() == hist_vec_b.histograms.size() );
424 size_t const histogram_bins
426 auto const set_size = sample_set.
size();
429 if( set_size == 0 ) {
430 return std::vector<NodeDistanceHistogramSet>();
439 sample_set[ 0 ].tree(), node_distances, node_sides, histogram_bins
441 auto result = std::vector<NodeDistanceHistogramSet>( set_size, empty_hist );
444 #pragma omp parallel for schedule(dynamic)
445 for(
size_t i = 0; i < set_size; ++i ) {
451 throw std::invalid_argument(
452 "Trees in SampleSet not compatible for calculating Node Histogram Distance."
459 sample_set[ i ], node_distances, node_sides, result[ i ]
461 assert( result[ i ].histograms.size() == sample_set[ i ].tree().node_count() );
469 size_t const histogram_bins