A toolkit for working with phylogenetic data.
v0.22.1
layout_base.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
42 
43 #include <cassert>
44 #include <stdexcept>
45 
46 namespace genesis {
47 namespace tree {
48 
49 // =================================================================================================
50 // Tree
51 // =================================================================================================
52 
53 void LayoutBase::tree( Tree const& orig_tree, bool ladderize_tree )
54 {
55  // We first copy the tree, then ladderize it before init, so that all positions
56  // are initialized correlty. This is possible because ladderize() only changes link pointers
57  // of the tree, but not and indices or node array positions.
58  tree_ = orig_tree.clone_topology();
59  if( ladderize_tree ) {
60  ladderize( tree_ );
61  }
62  init_tree_( orig_tree );
63 }
64 
66 {
67  return tree_;
68 }
69 
70 Tree const& LayoutBase::tree() const
71 {
72  return tree_;
73 }
74 
75 // =================================================================================================
76 // Edge Strokes
77 // =================================================================================================
78 
80 {
82  set_edge_distance_strokes( stroke );
83 }
84 
85 void LayoutBase::set_edge_strokes( std::vector< utils::SvgStroke > const& strokes )
86 {
87  set_edge_spreading_strokes( strokes );
88  set_edge_distance_strokes( strokes );
89 }
90 
92 {
93  for( size_t i = 0; i < tree_.edge_count(); ++i ) {
94  tree_.edge_at(i).data<LayoutEdgeData>().spreading_stroke = stroke;
95  }
96 }
97 
98 void LayoutBase::set_edge_spreading_strokes( std::vector< utils::SvgStroke > const& strokes )
99 {
100  // Empty: Reset to default.
101  if( strokes.empty() ) {
103  return;
104  }
105 
106  // Non-empty case.
107  if( strokes.size() != tree_.edge_count() ) {
108  throw std::runtime_error( "Edge stroke vector has wrong size." );
109  }
110  for( size_t i = 0; i < tree_.edge_count(); ++i ) {
111  tree_.edge_at(i).data<LayoutEdgeData>().spreading_stroke = strokes[ i ];
112  }
113 }
114 
116 {
117  for( size_t i = 0; i < tree_.edge_count(); ++i ) {
118  tree_.edge_at(i).data<LayoutEdgeData>().distance_stroke = stroke;
119  }
120 }
121 
122 void LayoutBase::set_edge_distance_strokes( std::vector< utils::SvgStroke > const& strokes )
123 {
124  // Empty: Reset to default.
125  if( strokes.empty() ) {
127  return;
128  }
129 
130  // Non-empty case.
131  if( strokes.size() != tree_.edge_count() ) {
132  throw std::runtime_error( "Edge stroke vector has wrong size." );
133  }
134  for( size_t i = 0; i < tree_.edge_count(); ++i ) {
135  tree_.edge_at(i).data<LayoutEdgeData>().distance_stroke = strokes[ i ];
136  }
137 }
138 
140 {
141  for( size_t i = 0; i < tree_.node_count(); ++i ) {
142  auto& node = tree_.node_at(i);
143 
144  bool set_stroke = ( spreading == LayoutSpreading::kAllNodes );
145  set_stroke |= (( spreading == LayoutSpreading::kLeafNodesOnly ) && ( is_leaf( node ) ));
146  set_stroke |= (( spreading == LayoutSpreading::kAllNodesButRoot ) && ( ! is_root( node ) ));
147 
148  if( set_stroke ) {
149  node.data<LayoutNodeData>().spacer_stroke = stroke;
150  }
151  }
152 }
153 
154 void LayoutBase::set_label_spacer_strokes( std::vector< utils::SvgStroke > const& strokes )
155 {
156  // Empty: Reset to default.
157  if( strokes.empty() ) {
161  );
162  return;
163  }
164 
165  // Non-empty case. We offer all edges or leaves only.
166  if( strokes.size() == tree_.node_count() ) {
167  // All nodes get a stroke.
168  for( size_t i = 0; i < tree_.node_count(); ++i ) {
169  tree_.node_at(i).data<LayoutNodeData>().spacer_stroke = strokes[ i ];
170  }
171  } else if( strokes.size() == tree_.node_count() -1 ) {
172  // All nodes but the root get a stroke.
173  size_t si = 0;
174  for( size_t i = 0; i < tree_.node_count(); ++i ) {
175  auto& node = tree_.node_at(i);
176  if( ! is_root( node )) {
177  node.data<LayoutNodeData>().spacer_stroke = strokes[ si ];
178  ++si;
179  }
180  }
181  } else if( strokes.size() == leaf_node_count( tree_ )) {
182  // Only leaf nodes get a stroke.
183  size_t si = 0;
184  for( size_t i = 0; i < tree_.node_count(); ++i ) {
185  auto& node = tree_.node_at(i);
186  if( is_leaf( node )) {
187  node.data<LayoutNodeData>().spacer_stroke = strokes[ si ];
188  ++si;
189  }
190  }
191  } else {
192  throw std::runtime_error(
193  "Edge stroke vector has wrong size. Has to be either tree.node_count(), "
194  "tree.node_count() - 1, or leaf_edge_count( tree )."
195  );
196  }
197 }
198 
199 // =================================================================================================
200 // Edge and Node Shapes
201 // =================================================================================================
202 
204 {
205  for( size_t i = 0; i < tree_.edge_count(); ++i ) {
206  tree_.edge_at(i).data<LayoutEdgeData>().shape = shape;
207  }
208 }
209 
210 void LayoutBase::set_edge_shapes( std::vector< utils::SvgGroup> const& shapes )
211 {
212  // Empty: Reset to default.
213  if( shapes.empty() ) {
215  return;
216  }
217 
218  // Non-empty case.
219  if( shapes.size() != tree_.edge_count() ) {
220  throw std::runtime_error( "Edge shape vector has wrong size." );
221  }
222  for( size_t i = 0; i < tree_.edge_count(); ++i ) {
223  tree_.edge_at(i).data<LayoutEdgeData>().shape = shapes[ i ];
224  }
225 }
226 
228 {
229  for( size_t i = 0; i < tree_.node_count(); ++i ) {
230  tree_.node_at(i).data<LayoutNodeData>().shape = shape;
231  }
232 }
233 
234 void LayoutBase::set_node_shapes( std::vector< utils::SvgGroup> const& shapes )
235 {
236  // Empty: Reset to default.
237  if( shapes.empty() ) {
239  return;
240  }
241 
242  // Non-empty case.
243  if( shapes.size() != tree_.node_count() ) {
244  throw std::runtime_error( "Node shape vector has wrong size." );
245  }
246  for( size_t i = 0; i < tree_.node_count(); ++i ) {
247  tree_.node_at(i).data<LayoutNodeData>().shape = shapes[ i ];
248  }
249 }
250 
251 // =================================================================================================
252 // Init
253 // =================================================================================================
254 
255 void LayoutBase::init_tree_( Tree const& orig_tree )
256 {
257  // Init nodes.
258  for( size_t i = 0; i < tree().node_count(); ++i ) {
259  auto& node = tree().node_at(i);
260  auto const& orig_node = orig_tree.node_at(i);
261 
262  // Safety: correct indices.
263  assert( node.index() == i && orig_node.index() == i );
264 
265  // Set the tree node data.
267 
268  // If the original tree has node names, use them.
269  auto orig_node_data_ptr = orig_node.data_cast<CommonNodeData>();
270  if( orig_node_data_ptr ) {
271  node.data<CommonNodeData>().name = orig_node_data_ptr->name;
272  }
273  }
274 
275  // Init edges.
276  for( size_t i = 0; i < tree().edge_count(); ++i ) {
277  auto& edge = tree().edge_at(i);
278  auto const& orig_edge = orig_tree.edge_at(i);
279 
280  // Safety: correct indices.
281  assert( edge.index() == i && orig_edge.index() == i );
282 
283  // Set the tree edge data.
285 
286  // If the original tree has edge branch lengths, use them.
287  auto orig_edge_data_ptr = orig_edge.data_cast<CommonEdgeData>();
288  if( orig_edge_data_ptr ) {
289  edge.data<CommonEdgeData>().branch_length = orig_edge_data_ptr->branch_length;
290  }
291  }
292 
293  // Layout
294  init_layout_();
295 }
296 
297 void LayoutBase::init_layout_()
298 {
299  if( tree().empty() ) {
300  return;
301  }
302 
303  // Set node parent indices.
304  size_t parent = tree_.root_node().index();
305  for( auto it : eulertour( tree() )) {
306  auto& node_data = tree().node_at( it.node().index() ).data<LayoutNodeData>();
307  if( node_data.parent_index == -1 ) {
308  node_data.parent_index = parent;
309  }
310  parent = it.node().index();
311  }
312 
313  // Set distances of nodes.
314  if( type() == LayoutType::kCladogram ) {
315  set_node_distances_cladogram_();
316  } else if( type() == LayoutType::kPhylogram ) {
317  set_node_distances_phylogram_();
318  } else {
319  throw std::runtime_error( "Invalid LayoutType value." );
320  }
321 
322  // Set spreadings of nodes.
323  if( inner_node_spreading_ == LayoutSpreading::kLeafNodesOnly ) {
324  set_node_spreadings_leaves_();
325  } else {
326  set_node_spreadings_all_( inner_node_spreading_ );
327  }
328 }
329 
330 void LayoutBase::set_node_spreadings_leaves_()
331 {
332  // Maximum number of nodes that we spread. We are actually spreading intervals between nodes,
333  // and not nodes themselves, so we need to subtract 1.
334  auto const num_leaves = leaf_node_count( tree() ) - 1;
335 
336  // Set spreading of leaves.
337  size_t leaf_count = 0;
338  for( auto it : eulertour( tree() )) {
339  auto& node_data = tree().node_at( it.node().index() ).data<LayoutNodeData>();
340  if( is_leaf( it.node() )) {
341  auto const lcd = static_cast<double>( leaf_count );
342  auto const nld = static_cast<double>( num_leaves );
343  node_data.spreading = lcd / nld;
344  leaf_count++;
345  }
346  }
347  assert( leaf_count == num_leaves + 1 );
348 
349  // Min and max spreading of the children of each node.
350  // Init to -1.0 so that we can check which ones are done already.
351  auto chrn_min_s = std::vector<double>( tree().node_count(), -1.0 );
352  auto chrn_max_s = std::vector<double>( tree().node_count(), -1.0 );
353 
354  // Set remaining spreading of inner nodes to mid-points of their children.
355  for( auto it : postorder( tree() )) {
356  auto const node_index = it.node().index();
357  auto& node_data = tree().node_at( node_index ).data<LayoutNodeData>();
358  auto const prnt_index = node_data.parent_index;
359 
360  if( node_data.spreading < 0.0 ) {
361  // We already have done the following nodes because of the postorder.
362  assert( chrn_min_s[ node_index ] > -1.0 );
363  assert( chrn_max_s[ node_index ] > -1.0 );
364 
365  auto min_max_diff = chrn_max_s[ node_index ] - chrn_min_s[ node_index ];
366  node_data.spreading = chrn_min_s[ node_index ] + min_max_diff / 2.0;
367  }
368 
369  if( chrn_min_s[ prnt_index ] < 0.0 || chrn_min_s[ prnt_index ] > node_data.spreading ) {
370  chrn_min_s[ prnt_index ] = node_data.spreading;
371  }
372  if( chrn_max_s[ prnt_index ] < 0.0 || chrn_max_s[ prnt_index ] < node_data.spreading ) {
373  chrn_max_s[ prnt_index ] = node_data.spreading;
374  }
375  }
376 }
377 
378 void LayoutBase::set_node_spreadings_all_( LayoutSpreading spreading )
379 {
380  assert( ! tree_.empty() );
381  if( ! is_bifurcating( tree_ )) {
382  throw std::invalid_argument(
383  "Tree is not bifurcating. Cannot draw with inner node spreading."
384  );
385  }
386  // We now allow unrooted tree (top level trifurcation) as well ;-)
387  // if( ! is_rooted( tree_ )) {
388  // throw std::invalid_argument(
389  // "Tree is not rooted. Cannot draw with inner node spreading."
390  // );
391  // }
392 
393  // Maximum number of nodes that we spread. We are actually spreading intervals between nodes,
394  // and not nodes themselves, so we need to subtract 1.
395  auto const num_nodes = tree().node_count() - 1 - (
396  spreading == LayoutSpreading::kAllNodesButRoot ? 1 : 0
397  );
398  auto visits = std::vector<size_t>( tree_.node_count(), 0 );
399  size_t node_counter = 0;
400 
401  for( auto it : eulertour( tree_ )) {
402  auto& node_data = it.node().data<LayoutNodeData>();
403  auto const node_index = it.node().index();
404 
405  // Count the how many-th visit this is. As we have a bifurcating tree,
406  // it can never surpass 3 visits.
407  ++visits[ node_index ];
408  assert( visits[ node_index ] <= 3 );
409 
410  if( spreading == LayoutSpreading::kAllNodesButRoot && is_root( it.node() )) {
411  continue;
412  } else if( is_leaf( it.node() ) || visits[ node_index ] == 2 ) {
413  auto const ncd = static_cast<double>( node_counter );
414  auto const nnd = static_cast<double>( num_nodes );
415  node_data.spreading = ncd / nnd;
416  ++node_counter;
417  }
418  }
419  assert( node_counter == num_nodes + 1 );
420 
421  // Special case for the root if we do not want to spread it:
422  // if the root is bifurcating (actual root), set its spread to the middle.
423  // if it is a virtual root (top level trifurcation), set its spread the the mid node.
424  if( spreading == LayoutSpreading::kAllNodesButRoot ) {
425  auto& root = tree_.root_node();
426  auto& root_data = root.data<LayoutNodeData>();
427  auto const degr = degree( root );
428  // assert( root_data.spreading < 0.0 );
429 
430  if( degr == 2 ) {
431  auto const& l_data = root.link().outer().node().data<LayoutNodeData>();
432  auto const& r_data = root.link().next().outer().node().data<LayoutNodeData>();
433  root_data.spreading = ( l_data.spreading + r_data.spreading ) / 2.0;
434  } else {
435  assert( degr == 3 );
436  auto const& l_data = root.link().outer().node().data<LayoutNodeData>();
437  auto const& r_data = root.link().next().next().outer().node().data<LayoutNodeData>();
438  root_data.spreading = ( l_data.spreading + r_data.spreading ) / 2.0;
439 
440  // auto const& m_data = root.link().next().outer().node().data<LayoutNodeData>();
441  // root_data.spreading = m_data.spreading;
442  }
443  }
444 }
445 
446 void LayoutBase::set_node_distances_phylogram_()
447 {
448  // Get distnace from root to every node.
449  auto node_dists = node_branch_length_distance_vector( tree() );
450 
451  // We already check the size in init_layout_()
452  assert( ! node_dists.empty() );
453  auto const max_d = *std::max_element( node_dists.begin(), node_dists.end() );
454 
455  for( size_t i = 0; i < node_dists.size(); ++i ) {
456  tree().node_at(i).data<LayoutNodeData>().distance = node_dists[i] / max_d;
457  }
458 }
459 
460 void LayoutBase::set_node_distances_cladogram_()
461 {
462  // Set root distance to 0.
463  tree().root_node().data<LayoutNodeData>().distance = 0.0;
464 
465  // Get the heights of all subtrees starting from the root.
466  auto const heights = subtree_max_path_heights( tree() );
467 
468  // Get the height of the tree, i.e. longest path from root to any leaf.
469  auto const root_height = static_cast<double>( heights[ tree().root_node().index() ] );
470 
471  for( auto it : preorder( tree() )) {
472  // The subtree height calculation does not work for the root, so skip it.
473  // Also, we already set the leaf.
474  if( it.is_first_iteration() ) {
475  continue;
476  }
477 
478  // Get the height of the subtree starting at the current node.
479  auto const height = static_cast<double>( heights[ it.node().index() ]);
480  assert( height <= root_height );
481 
482  // Set the distance.
483  auto const distance = ( root_height - height ) / root_height;
484  tree().node_at( it.node().index() ).data<LayoutNodeData>().distance = distance;
485  }
486 }
487 
488 // =================================================================================================
489 // Options
490 // =================================================================================================
491 
492 void LayoutBase::type( LayoutType const drawing_type )
493 {
494  type_ = drawing_type;
495  if( ! tree_.empty() ) {
496  init_layout_();
497  }
498 }
499 
501 {
502  return type_;
503 }
504 
506 {
507  inner_node_spreading_ = value;
508  if( ! tree_.empty() ) {
509  init_layout_();
510  }
511 }
512 
514 {
515  return inner_node_spreading_;
516 }
517 
518 void LayoutBase::align_labels( bool value )
519 {
520  align_labels_ = value;
521 }
522 
524 {
525  return align_labels_;
526 }
527 
528 void LayoutBase::extra_spacer( double value )
529 {
530  extra_spacer_ = value;
531 }
532 
534 {
535  return extra_spacer_;
536 }
537 
539 {
540  text_template_ = tt;
541 }
542 
544 {
545  return text_template_;
546 }
547 
549 {
550  return text_template_;
551 }
552 
553 } // namespace tree
554 } // namespace genesis
void set_node_shapes(utils::SvgGroup const &shape)
void ladderize(Tree &tree, LadderizeOrder order)
Ladderize a Tree, that is, order its subtrees by size.
LayoutType type() const
bool is_root(TreeLink const &link)
Return whether the link belongs to the root node of its Tree.
static std::unique_ptr< LayoutNodeData > create()
Header of CommonTree distance methods.
double extra_spacer() const
Tree operator functions.
bool empty() const
Return whether the Tree is empty (i.e., has no nodes, edges and links).
Definition: tree/tree.hpp:194
static std::unique_ptr< LayoutEdgeData > create()
size_t edge_count() const
Return the number of TreeEdges of the Tree.
Definition: tree/tree.hpp:272
bool is_bifurcating(Tree const &tree, bool loose)
Return whether the Tree is bifurcating.
TreeEdge & reset_data(std::unique_ptr< BaseEdgeData > data)
Reset the data pointer of this TreeEdge.
Definition: edge.hpp:284
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
LayoutSpreading inner_node_spreading() const
utils::Range< IteratorEulertour< true > > eulertour(ElementType const &element)
Definition: eulertour.hpp:206
void set_edge_shapes(utils::SvgGroup const &shape)
Data class for LayoutTreeEdges.
utils::Range< IteratorPostorder< true > > postorder(ElementType const &element)
Tree clone_topology() const
Return a Tree with the same topology, but without any data.
Definition: tree/tree.cpp:109
void set_edge_distance_strokes(utils::SvgStroke const &stroke)
size_t leaf_node_count(Tree const &tree)
Count the number of leaf Nodes of a Tree.
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:97
std::vector< double > node_branch_length_distance_vector(Tree const &tree, TreeNode const *node)
Return a vector containing the distance of all nodes with respect to the given start node...
double height(Tree const &tree)
Get the height of the tree, i.e., the longest distance from the root to a leaf, measured using the br...
TreeNode & node_at(size_t index)
Return the TreeNode at a certain index.
Definition: tree/tree.hpp:220
Tree const & tree() const
Definition: layout_base.cpp:70
std::vector< size_t > subtree_max_path_heights(Tree const &tree, TreeNode const &node)
void set_label_spacer_strokes(utils::SvgStroke const &stroke, LayoutSpreading spreading=LayoutSpreading::kLeafNodesOnly)
TreeEdge & edge_at(size_t index)
Return the TreeEdge at a certain index.
Definition: tree/tree.hpp:238
std::string name
Name of the node.
LayoutSpreading
Spreading of the nodes of a tree for drawing.
Definition: layout_base.hpp:80
void set_edge_strokes(utils::SvgStroke const &stroke)
Definition: layout_base.cpp:79
size_t node_count() const
Return the number of TreeNodes of the Tree.
Definition: tree/tree.hpp:264
double branch_length
Branch length of the edge.
utils::Range< IteratorPreorder< true > > preorder(ElementType const &element)
LayoutType
Type of tree for drawing, either phylogram or cladogram.
Definition: layout_base.hpp:62
double spreading
Position of the node along the second axis.
Common class containing the commonly needed data for tree nodes.
TreeNode & root_node()
Return the TreeNode at the current root of the Tree.
Definition: tree/tree.hpp:300
NodeDataType & data()
Definition: node.hpp:169
Data class for LayoutTreeNodes.
Definition: layout_tree.hpp:80
Header of Tree distance methods.
size_t index() const
Return the index of this Node.
Definition: node.hpp:101
TreeNode & reset_data(std::unique_ptr< BaseNodeData > data)
Reset the data pointer of this TreeNode.
Definition: node.hpp:256
Common class containing the commonly needed data for tree edges.
size_t degree(TreeLink const &link)
Return the degree of the node for a given TreeLink, i.e. how many neighbouring nodes it has...
EdgeDataType & data()
Definition: edge.hpp:183
void set_edge_spreading_strokes(utils::SvgStroke const &stroke)
Definition: layout_base.cpp:91
EdgeDataType * data_cast()
Definition: edge.hpp:195
bool is_leaf(TreeLink const &link)
Return true iff the node of the given link is a leaf node.
utils::SvgText & text_template()
NodeDataType * data_cast()
Definition: node.hpp:181