74 size_t const histogram_bins
78 throw std::runtime_error(
"Tree is empty. Cannot use Node Histogram Distance." );
81 throw std::runtime_error(
"Node Distance Matrix has wrong size." );
84 throw std::runtime_error(
"Node Sides Matrix has wrong size." );
93 #pragma omp parallel for
94 for(
size_t node_idx = 0; node_idx <
node_count; ++node_idx ) {
100 for(
size_t col_idx = 0; col_idx <
node_count; ++col_idx ) {
103 if( col_idx == node_idx ) {
109 auto const dist = node_distances( node_idx, col_idx );
112 if( node_sides( node_idx, col_idx ) == 1 && dist > max ) {
117 if( node_sides( node_idx, col_idx ) == -1 && dist > min ) {
123 if( min == 0.0 && max == 0.0 ) {
124 throw std::runtime_error(
125 "Tree only has branch lengths with value 0. Cannot use Node Histogram Distance."
129 histogram_set.
histograms[ node_idx ].min = -min;
130 histogram_set.
histograms[ node_idx ].max = max;
131 histogram_set.
histograms[ node_idx ].bins = std::vector<double>( histogram_bins, 0.0 );
134 return histogram_set;
153 throw std::runtime_error(
"Number of histograms does not equal number of tree nodes." );
156 throw std::runtime_error(
"Node Distance Matrix has wrong size." );
159 throw std::runtime_error(
"Node Sides Matrix has wrong size." );
166 for(
auto const& pquery : sample ) {
169 for(
auto const& placement : pquery.placements() ) {
170 auto& place = placements[pcnt];
172 place.edge_index = placement.edge().index();
173 place.primary_node_index = placement.edge().primary_node().index();
174 place.secondary_node_index = placement.edge().secondary_node().index();
178 place.pendant_length = placement.pendant_length;
179 place.proximal_length = placement.proximal_length;
183 place.like_weight_ratio = placement.like_weight_ratio * mult;
190 for(
size_t node_index = 0; node_index < sample.tree().node_count(); ++node_index ) {
193 auto& histogram = histogram_set.
histograms[ node_index ];
194 const auto min = histogram.min;
195 const auto max = histogram.max;
196 const auto bins = histogram.bins.size();
197 const double bin_width = ( max - min ) / static_cast<double>( bins );
201 for(
auto const& placement : placements ) {
204 double const p_dist = placement.proximal_length
205 + node_distances( node_index, placement.primary_node_index )
207 double const d_dist = placement.branch_length
208 - placement.proximal_length
209 + node_distances( node_index, placement.secondary_node_index )
211 double const dist = std::min( p_dist, d_dist );
216 auto const side = node_sides( node_index, placement.primary_node_index );
217 double const sign = ( side == 1 ? 1.0 : -1.0 );
220 auto const x = sign * dist;
224 }
else if( x >= max ) {
227 bin =
static_cast<size_t>(( x - min ) / bin_width );
228 assert( bin < bins );
232 histogram.bins[ bin ] += placement.like_weight_ratio;
233 sum += placement.like_weight_ratio;
237 for(
auto& val : histogram.bins ) {
255 size_t const histogram_bins
259 sample.
tree(), node_distances, node_sides, histogram_bins
274 throw std::runtime_error(
275 "Cannot calculate distance between NodeDistanceHistograms of different dimensions."
280 auto entries = std::vector<double>( lhs.
bins.size(), 0.0 );
282 const double bin_width = ( lhs.
max - lhs.
min ) / static_cast<double>( lhs.
bins.size() );
285 for(
size_t i = 0; i < lhs.
bins.size() - 1; ++i) {
286 entries[i + 1] = lhs.
bins[i] + entries[i] - rhs.
bins[i];
287 result += std::abs( entries[i + 1] ) * bin_width;
298 throw std::runtime_error(
299 "Cannot calculate distance between NodeDistanceHistogramSets of different size."
306 for(
size_t k = 0; k < lhs.
histograms.size(); ++k ) {
309 assert( dist >= 0.0 );
312 dist /=
static_cast< double >( lhs.
histograms.size() );
317 std::vector<NodeDistanceHistogramSet>
const& histogram_sets
320 auto const set_size = histogram_sets.size();
324 #ifdef GENESIS_OPENMP
331 #pragma omp parallel for
332 for(
size_t k = 0; k < max_k; ++k ) {
336 auto const i = ij.first;
337 auto const j = ij.second;
349 for(
size_t i = 0; i < set_size; ++i ) {
352 for(
size_t j = i + 1; j < set_size; ++j ) {
376 size_t const histogram_bins
386 sample.
tree(), node_distances, node_sides, histogram_bins
395 size_t const histogram_bins
398 throw std::invalid_argument(
"Incompatible trees." );
407 assert( hist_vec_a.histograms.size() == sample_a.
tree().
node_count() );
408 assert( hist_vec_b.histograms.size() == sample_b.
tree().
node_count() );
409 assert( hist_vec_a.histograms.size() == hist_vec_b.histograms.size() );
423 size_t const histogram_bins
425 auto const set_size = sample_set.
size();
428 if( set_size == 0 ) {
438 sample_set[ 0 ].sample.tree(), node_distances, node_sides, histogram_bins
440 auto result = std::vector<NodeDistanceHistogramSet>( set_size, empty_hist );
443 #pragma omp parallel for schedule(dynamic)
444 for(
size_t i = 0; i < set_size; ++i ) {
449 if( !
compatible_trees( sample_set[ i - 1 ].sample, sample_set[ i ].sample )) {
450 throw std::invalid_argument(
451 "Trees in SampleSet not compatible for calculating Node Histogram Distance."
458 sample_set[ i ].sample, node_distances, node_sides, result[ i ]
460 assert( result[ i ].histograms.size() == sample_set[ i ].sample.tree().node_count() );
468 size_t const histogram_bins
Data class for PlacementTreeEdges. Stores the branch length of the edge, and the edge_num, as defined in the jplace standard.
PlacementTree & tree()
Get the PlacementTree of this Sample.
bool compatible_trees(PlacementTree const &lhs, PlacementTree const &rhs)
Return whether two PlacementTrees are compatible.
std::vector< double > bins
utils::Matrix< signed char > node_root_direction_matrix(Tree const &tree)
Calculate a Matrix that indicates the nodes on the root side of a given node.
std::vector< NodeDistanceHistogram > histograms
void fill_node_distance_histogram_set(Sample const &sample, utils::Matrix< double > const &node_distances, utils::Matrix< signed char > const &node_sides, NodeDistanceHistogramSet &histogram_set)
Fill the placements of a Sample into Histograms.
Provides functions for working with Placements and Pqueries.
NodeDistanceHistogramSet node_distance_histogram_set(Sample const &sample, utils::Matrix< double > const &node_distances, utils::Matrix< signed char > const &node_sides, size_t const histogram_bins)
Calcualte the NodeDistanceHistogramSet representing a single Sample, given the necessary matrices of ...
double sum(const Histogram &h)
double node_histogram_distance(NodeDistanceHistogram const &lhs, NodeDistanceHistogram const &rhs)
size_t node_count() const
Return the number of TreeNodes of the Tree.
Collection of NodeDistanceHistograms that describes one Sample.
double total_multiplicity(Pquery const &pqry)
Return the sum of all multiplicities of the Pquery.
Class for representing phylogenetic trees.
size_t triangular_size(size_t n)
Calculate the number of linear indices needed for a triangular Matrix of size n.
utils::Matrix< double > node_branch_length_distance_matrix(Tree const &tree)
Return a distance matrix containing pairwise distances between all Nodes, using the branch_length of ...
bool empty() const
Return whether the Tree is empty (i.e., has no nodes, edges and links).
Store a set of Samples with associated names.
Header of Default Tree distance methods.
Header of PqueryPlain class.
size_t size() const
Return the size of the SampleSet, i.e., the number of Samples.
Simple histogram data structure with equal sized bins.
double branch_length
Branch length of the edge.
std::pair< size_t, size_t > triangular_indices(size_t k, size_t n)
Given a linear index in a upper triangular Matrix, find the corresponding Matrix indices.
Manage a set of Pqueries along with the PlacementTree where the PqueryPlacements are placed on...
size_t node_count(Tree const &tree)
Return the number of Nodes of a Tree. Same as Tree::node_count().
Header of Tree distance methods.
NodeDistanceHistogramSet make_empty_node_distance_histogram_set(tree::Tree const &tree, utils::Matrix< double > const &node_distances, utils::Matrix< signed char > const &node_sides, size_t const histogram_bins)
Create a set of Histograms without any weights for a given Tree.
size_t total_placement_count(Sample const &smp)
Get the total number of PqueryPlacements in all Pqueries of the given Sample.