57 std::unordered_map<std::string, size_t> names;
61 for(
auto const& node : tree.
nodes() ) {
67 throw std::runtime_error(
68 "Cannot calculate RF distnace with empty leaf node names."
71 if( names.count( name ) > 0 ) {
72 throw std::runtime_error(
73 "Cannot calculate RF distance with tree that has duplicate node names. "
74 "Name '" + name +
"' appears multiple times."
79 auto const idx = names.size();
109 template<
typename BitvectorProcessor>
112 std::unordered_map<std::string, size_t>
const& names,
113 BitvectorProcessor
const& process_bitvector
119 auto bipartitions = std::vector<Bitvector>( tree.
edge_count() );
122 auto name_check = Bitvector( names.size() );
131 size_t root_skip = std::numeric_limits<size_t>::max();
141 if( it.is_last_iteration() ) {
146 if( it.node().index() == root_skip ) {
155 auto const nit = names.find( name );
156 if( nit == names.end() ) {
157 throw std::runtime_error(
158 "Cannot calculate RF distance with inconsistent node names. "
159 "Name '" + name +
"' is missing from a tree."
164 if( name_check[ nit->second ] ) {
165 throw std::runtime_error(
166 "Cannot calculate RF distance with tree that has duplicate node names. "
167 "Name '"+ name +
"' appears multiple times."
170 name_check.set( nit->second );
175 auto const eidx = it.edge().index();
176 bipartitions[ eidx ] = Bitvector( names.size() );
177 bipartitions[ eidx ].set( nit->second );
181 auto current = Bitvector( names.size() );
191 auto l = &it.link().next();
192 while( l != &it.link() ) {
193 current |= bipartitions[ l->edge().index() ];
200 bipartitions[ it.edge().index() ] = current;
205 process_bitvector( current );
211 if( name_check.count() != names.size() ) {
212 throw std::runtime_error(
213 "Cannot calculate RF distance with trees that have different node names. "
214 "Some names are missing from one of the trees."
221 std::unordered_map<std::string, size_t>
const& names
226 std::vector<Bitvector> result;
231 result.push_back( bitvec );
247 auto result = std::unordered_map<utils::Bitvector, utils::Bitvector>();
250 if( trees.
empty() ) {
257 #pragma omp parallel for
258 for(
size_t i = 0; i < trees.
size(); ++i ) {
264 #pragma omp critical(GENESIS_BIPARTITION_OCCURRENCES)
266 if( result.count( bitvec ) == 0 ) {
267 result[bitvec] = Bitvector( trees.size() );
269 result[bitvec].set( i );
285 auto result = std::unordered_map<utils::Bitvector, utils::Bitvector>();
298 assert( result.count( bitvec ) == 0 );
299 result[bitvec] = Bitvector( 1 + rhs.
size() );
300 result[bitvec].set( 0 );
304 #pragma omp parallel for
305 for(
size_t i = 0; i < rhs.
size(); ++i ) {
308 #pragma omp critical(GENESIS_BIPARTITION_OCCURRENCES)
311 if( result.count( bitvec ) == 0 ) {
312 result[bitvec] = utils::Bitvector( 1 + rhs.size() );
314 result[bitvec].set( 1 + i );
330 auto result = std::unordered_map<utils::Bitvector, utils::Bitvector>();
338 assert( result.count( bitvec ) == 0 );
339 result[bitvec] = Bitvector( 2 );
340 result[bitvec].set( 0 );
346 if( result.count( bitvec ) == 0 ) {
347 result[bitvec] = utils::Bitvector( 2 );
349 result[bitvec].set( 1 );
363 auto result = Matrix<size_t>( trees.
size(), trees.
size(), 0 );
367 for(
auto const& hash_occ : hash_occs ) {
370 for(
size_t i = 0; i < trees.
size(); ++i ) {
371 if( hash_occ.second[i] ==
false ) {
379 for(
size_t j = 0; j < trees.
size(); ++j ) {
383 if( hash_occ.second[j] ==
false ) {
396 auto result = std::vector<size_t>( rhs.
size(), 0 );
400 for(
auto const& hash_occ : hash_occs ) {
402 bool const in_lhs = hash_occ.second[0];
405 for(
size_t i = 0; i < rhs.
size(); ++i ) {
406 bool const in_rhs = hash_occ.second[ 1 + i ];
413 result[i] += (in_lhs ^ in_rhs);
434 for(
auto const& hash_occ : hash_occs ) {
435 assert( hash_occ.second.size() == 2 );
438 bool const in_lhs = hash_occ.second[ 0 ];
439 bool const in_rhs = hash_occ.second[ 1 ];
443 assert( in_lhs || in_rhs );
450 result += (in_lhs ^ in_rhs);
464 if( trees.
size() == 0 ) {
470 assert( rf.rows() == trees.
size() );
471 assert( rf.cols() == trees.
size() );
474 assert( trees.
size() > 0 );
476 throw std::runtime_error(
477 "Cannot compute relative RF distance for trees with less than 3 taxa."
480 double const norm = 2.0 *
static_cast<double>( trees[0].node_count() - 3 );
483 #pragma omp parallel for
484 for(
size_t i = 0; i < rf.rows(); ++i ) {
485 for(
size_t j = 0; j < rf.cols(); ++j ) {
486 result( i, j ) =
static_cast<double>( rf( i, j )) / norm;
496 auto result = std::vector<double>( rhs.
size() );
500 assert( rf.size() == rhs.
size() );
504 throw std::runtime_error(
505 "Cannot compute relative RF distance for trees with less than 3 taxa."
508 double const norm = 2.0 *
static_cast<double>( lhs.
node_count() - 3 );
511 #pragma omp parallel for
512 for(
size_t i = 0; i < rf.size(); ++i ) {
513 result[i] =
static_cast<double>( rf[i] ) / norm;
526 throw std::runtime_error(
527 "Cannot compute relative RF distance for trees with less than 3 taxa."
530 double const norm = 2.0 *
static_cast<double>( lhs.
node_count() - 3 );
532 return static_cast<double>( rf ) / norm;