75 size_t const histogram_bins
79 throw std::runtime_error(
"Tree is empty. Cannot use Node Histogram Distance." );
82 throw std::runtime_error(
"Node Distance Matrix has wrong size." );
85 throw std::runtime_error(
"Node Sides Matrix has wrong size." );
94 #pragma omp parallel for 95 for(
size_t node_idx = 0; node_idx <
node_count; ++node_idx ) {
101 for(
size_t col_idx = 0; col_idx <
node_count; ++col_idx ) {
104 if( col_idx == node_idx ) {
110 auto const dist = node_distances( node_idx, col_idx );
113 if( node_sides( node_idx, col_idx ) == 1 && dist > max ) {
118 if( node_sides( node_idx, col_idx ) == -1 && dist > min ) {
124 if( min == 0.0 && max == 0.0 ) {
125 throw std::runtime_error(
126 "Tree only has branch lengths with value 0. Cannot use Node Histogram Distance." 130 histogram_set.
histograms[ node_idx ].min = -min;
131 histogram_set.
histograms[ node_idx ].max = max;
132 histogram_set.
histograms[ node_idx ].bins = std::vector<double>( histogram_bins, 0.0 );
135 return histogram_set;
154 throw std::runtime_error(
"Number of histograms does not equal number of tree nodes." );
157 throw std::runtime_error(
"Node Distance Matrix has wrong size." );
160 throw std::runtime_error(
"Node Sides Matrix has wrong size." );
167 for(
auto const& pquery : sample ) {
170 for(
auto const& placement : pquery.placements() ) {
171 auto& place = placements[pcnt];
173 place.edge_index = placement.edge().index();
174 place.primary_node_index = placement.edge().primary_node().index();
175 place.secondary_node_index = placement.edge().secondary_node().index();
179 place.pendant_length = placement.pendant_length;
180 place.proximal_length = placement.proximal_length;
184 place.like_weight_ratio = placement.like_weight_ratio * mult;
191 for(
size_t node_index = 0; node_index < sample.tree().node_count(); ++node_index ) {
194 auto& histogram = histogram_set.
histograms[ node_index ];
195 const auto min = histogram.min;
196 const auto max = histogram.max;
197 const auto bins = histogram.bins.size();
198 const double bin_width = ( max - min ) / static_cast<double>( bins );
202 for(
auto const& placement : placements ) {
205 double const p_dist = placement.proximal_length
206 + node_distances( node_index, placement.primary_node_index )
208 double const d_dist = placement.branch_length
209 - placement.proximal_length
210 + node_distances( node_index, placement.secondary_node_index )
212 double const dist = std::min( p_dist, d_dist );
217 auto const side = node_sides( node_index, placement.primary_node_index );
218 double const sign = ( side == 1 ? 1.0 : -1.0 );
221 auto const x = sign * dist;
225 }
else if( x >= max ) {
228 bin =
static_cast<size_t>(( x - min ) / bin_width );
229 assert( bin < bins );
233 histogram.bins[ bin ] += placement.like_weight_ratio;
234 sum += placement.like_weight_ratio;
238 for(
auto& val : histogram.bins ) {
256 size_t const histogram_bins
260 sample.
tree(), node_distances, node_sides, histogram_bins
275 throw std::runtime_error(
276 "Cannot calculate distance between NodeDistanceHistograms of different dimensions." 281 auto entries = std::vector<double>( lhs.
bins.size(), 0.0 );
283 const double bin_width = ( lhs.
max - lhs.
min ) / static_cast<double>( lhs.
bins.size() );
286 for(
size_t i = 0; i < lhs.
bins.size() - 1; ++i) {
287 entries[i + 1] = lhs.
bins[i] + entries[i] - rhs.
bins[i];
288 result += std::abs( entries[i + 1] ) * bin_width;
299 throw std::runtime_error(
300 "Cannot calculate distance between NodeDistanceHistogramSets of different size." 307 for(
size_t k = 0; k < lhs.
histograms.size(); ++k ) {
310 assert( dist >= 0.0 );
313 dist /=
static_cast< double >( lhs.
histograms.size() );
318 std::vector<NodeDistanceHistogramSet>
const& histogram_sets
321 auto const set_size = histogram_sets.size();
325 #ifdef GENESIS_OPENMP 332 #pragma omp parallel for 333 for(
size_t k = 0; k < max_k; ++k ) {
337 auto const i = ij.first;
338 auto const j = ij.second;
350 for(
size_t i = 0; i < set_size; ++i ) {
353 for(
size_t j = i + 1; j < set_size; ++j ) {
377 size_t const histogram_bins
387 sample.
tree(), node_distances, node_sides, histogram_bins
396 size_t const histogram_bins
399 throw std::invalid_argument(
"Incompatible trees." );
408 assert( hist_vec_a.histograms.size() == sample_a.
tree().
node_count() );
409 assert( hist_vec_b.histograms.size() == sample_b.
tree().
node_count() );
410 assert( hist_vec_a.histograms.size() == hist_vec_b.histograms.size() );
424 size_t const histogram_bins
426 auto const set_size = sample_set.
size();
429 if( set_size == 0 ) {
430 return std::vector<NodeDistanceHistogramSet>();
439 sample_set[ 0 ].tree(), node_distances, node_sides, histogram_bins
441 auto result = std::vector<NodeDistanceHistogramSet>( set_size, empty_hist );
444 #pragma omp parallel for schedule(dynamic) 445 for(
size_t i = 0; i < set_size; ++i ) {
451 throw std::invalid_argument(
452 "Trees in SampleSet not compatible for calculating Node Histogram Distance." 459 sample_set[ i ], node_distances, node_sides, result[ i ]
461 assert( result[ i ].histograms.size() == sample_set[ i ].tree().node_count() );
469 size_t const histogram_bins
size_t size() const
Return the size of the SampleSet, i.e., the number of Samples.
static double node_histogram_distance(NodeDistanceHistogram const &lhs, NodeDistanceHistogram const &rhs)
Data class for PlacementTreeEdges. Stores the branch length of the edge, and the edge_num, as defined in the jplace standard.
PlacementTree & tree()
Get the PlacementTree of this Sample.
Header of CommonTree distance methods.
std::vector< double > bins
bool empty() const
Return whether the Tree is empty (i.e., has no nodes, edges and links).
static void fill_node_distance_histogram_set_(Sample const &sample, utils::Matrix< double > const &node_distances, utils::Matrix< signed char > const &node_sides, NodeDistanceHistogramSet &histogram_set)
Local helper function to fill the placements of a Sample into Histograms.
std::vector< NodeDistanceHistogram > histograms
utils::Matrix< signed char > node_root_direction_matrix(Tree const &tree)
Calculate a Matrix that indicates the nodes on the root side of a given node.
Provides functions for working with Placements and Pqueries.
size_t node_count(Tree const &tree)
Return the number of Nodes of a Tree. Same as Tree::node_count().
double sum(const Histogram &h)
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
Collection of NodeDistanceHistograms that describes one Sample.
Class for representing phylogenetic trees.
size_t triangular_size(size_t n)
Calculate the number of linear indices needed for a triangular Matrix of size n.
Store a set of Samples with associated names.
double total_multiplicity(Pquery const &pqry)
Return the sum of all multiplicities of the Pquery.
static NodeDistanceHistogramSet make_empty_node_distance_histogram_set_(tree::Tree const &tree, utils::Matrix< double > const &node_distances, utils::Matrix< signed char > const &node_sides, size_t const histogram_bins)
Local helper function to create a set of Histograms without any weights for a given Tree...
size_t total_placement_count(Sample const &smp)
Get the total number of PqueryPlacements in all Pqueries of the given Sample.
Header of PqueryPlain class.
size_t node_count() const
Return the number of TreeNodes of the Tree.
Simple histogram data structure with equal sized bins.
double branch_length
Branch length of the edge.
std::pair< size_t, size_t > triangular_indices(size_t k, size_t n)
Given a linear index in a upper triangular Matrix, find the corresponding Matrix indices.
Manage a set of Pqueries along with the PlacementTree where the PqueryPlacements are placed on...
bool compatible_trees(PlacementTree const &lhs, PlacementTree const &rhs)
Return whether two PlacementTrees are compatible.
NodeDistanceHistogramSet node_distance_histogram_set(Sample const &sample, utils::Matrix< double > const &node_distances, utils::Matrix< signed char > const &node_sides, size_t const histogram_bins)
Calculate the NodeDistanceHistogramSet representing a single Sample, given the necessary matrices of ...
Header of Tree distance methods.
utils::Matrix< double > node_branch_length_distance_matrix(Tree const &tree)
Return a distance matrix containing pairwise distances between all Nodes, using the branch_length of ...