A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sample_set.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
38 
39 #include <ostream>
40 
41 namespace genesis {
42 namespace placement {
43 
44 // =================================================================================================
45 // Sample Set Functions
46 // =================================================================================================
47 
48 Sample* find_sample( SampleSet& sample_set, std::string const& name )
49 {
50  for (auto& nsmp : sample_set) {
51  if( nsmp.name == name ) {
52  return &nsmp.sample;
53  }
54  }
55  return nullptr;
56 }
57 
58 Sample const* find_sample( SampleSet const& sample_set, std::string const& name )
59 {
60  for (auto& nsmp : sample_set) {
61  if( nsmp.name == name ) {
62  return &nsmp.sample;
63  }
64  }
65  return nullptr;
66 }
67 
68 Sample merge_all( SampleSet const& sample_set )
69 {
70  // The following operations do a lot of traversals on all trees: first some for the
71  // average_branch_length_tree, then for the merging again. This could be turned into
72  // less traversals by copying code and doing all in one run. However, at the current point, this
73  // method will be called once in the beginning of a program run, and thus it is not necessary to
74  // optimize for speed. Instead, we opt for clean, separated and easy code here.
75 
76  if( sample_set.size() == 0 ) {
77  return Sample();
78  }
79 
80  // Create a new Sample and initialize it with the average branch length tree of all
81  // maps in this set, but without any placements.
82  auto res = Sample( average_branch_length_tree( sample_set ));
83 
84  // Copy the rest of the data from the first tree to the averaged tree.
85  // This is necessary, because the tree copy constructor does not do this for us.
86  // TODO fix this!
87  for (size_t i = 0; i < res.tree().node_count(); ++i) {
88  res.tree().node_at(i).data<PlacementNodeData>().name
89  = sample_set[0].sample.tree().node_at(i).data<PlacementNodeData>().name;
90  }
91  for (size_t i = 0; i < res.tree().edge_count(); ++i) {
92  res.tree().edge_at(i).data<PlacementEdgeData>().reset_edge_num(
93  sample_set[0].sample.tree().edge_at(i).data<PlacementEdgeData>().edge_num()
94  );
95  }
96 
97  // Add the placements from all maps of this set.
98  // In the merge method, we also check for identical topology (again), but mainly for identical
99  // taxa names and edge_nums, which is important for correct merging.
100  for (auto& smp : sample_set) {
101  copy_pqueries( smp.sample, res );
102  }
103 
104  return res;
105 }
106 
107 size_t total_pquery_count( SampleSet const& sample_set )
108 {
109  size_t s = 0;
110  for( auto const& sample : sample_set ) {
111  s += sample.sample.size();
112  }
113  return s;
114 }
115 
116 // =================================================================================================
117 // Tree Functions
118 // =================================================================================================
119 
121 {
122  return average_branch_length_tree( tree_set( sample_set ));
123 }
124 
125 bool all_identical_trees( SampleSet const& sample_set )
126 {
127  auto node_comparator = [] (
128  PlacementTreeNode const& node_l,
129  PlacementTreeNode const& node_r
130  ) {
131  auto l_ptr = dynamic_cast< PlacementNodeData const* >( node_l.data_ptr() );
132  auto r_ptr = dynamic_cast< PlacementNodeData const* >( node_r.data_ptr() );
133  if( l_ptr == nullptr || r_ptr == nullptr ) {
134  return false;
135  }
136  return l_ptr->name == r_ptr->name &&
137  node_l.index() == node_r.index();
138  };
139 
140  auto edge_comparator = [] (
141  PlacementTreeEdge const& edge_l,
142  PlacementTreeEdge const& edge_r
143  ) {
144  auto l_ptr = dynamic_cast< PlacementEdgeData const* >( edge_l.data_ptr() );
145  auto r_ptr = dynamic_cast< PlacementEdgeData const* >( edge_r.data_ptr() );
146  if( l_ptr == nullptr || r_ptr == nullptr ) {
147  return false;
148  }
149  return l_ptr->edge_num() == r_ptr->edge_num() &&
150  edge_l.primary_node().index() == edge_r.primary_node().index() &&
151  edge_l.secondary_node().index() == edge_r.secondary_node().index();
152  };
153 
154  return all_equal( tree_set( sample_set ), node_comparator, edge_comparator );
155 }
156 
157 tree::TreeSet tree_set( SampleSet const& sample_set )
158 {
159  tree::TreeSet tset;
160  for( auto const& smp : sample_set ) {
161  tset.add( smp.name, smp.sample.tree() );
162  }
163  return tset;
164 }
165 
166 void adjust_branch_lengths( SampleSet& sample_set, tree::Tree const& source )
167 {
168  for( auto& smp : sample_set ) {
169  adjust_branch_lengths( smp.sample, source );
170  }
171 }
172 
174 {
175  adjust_branch_lengths( sample_set, average_branch_length_tree( sample_set ));
176 }
177 
178 // =================================================================================================
179 // Output
180 // =================================================================================================
181 
182 std::ostream& operator << ( std::ostream& out, SampleSet const& sample_set )
183 {
184  // TODO this was meant for full output. turn it into a printer instead!
185  bool full = false;
186 
187  size_t i = 0;
188  for( auto const& cm : sample_set ) {
189  out << std::to_string(i) << ": " << cm.name << "\n";
190  if (full) {
191  out << cm.sample << "\n";
192  }
193  ++i;
194  }
195  return out;
196 }
197 
198 } // namespace placement
199 } // namespace genesis
size_t total_pquery_count(SampleSet const &sample_set)
Return the total number of Pqueries in the Samples of the SampleSet.
Definition: sample_set.cpp:107
Data class for PlacementTreeEdges. Stores the branch length of the edge, and the edge_num, as defined in the jplace standard.
Sample merge_all(SampleSet const &sample_set)
Returns a Sample where all Samples of a SampleSet have been merged into.
Definition: sample_set.cpp:68
Provides functions for working with Placements and Pqueries.
TreeNode & secondary_node()
Return the TreeNode of this TreeEdge that points away from the root.
Definition: edge.hpp:161
bool all_identical_trees(SampleSet const &sample_set)
Returns true iff all Trees of the Samples in the set are identical.
Definition: sample_set.cpp:125
std::string to_string(T const &v)
Return a string representation of a given value.
Definition: string.hpp:381
size_t index() const
Return the index of this Node.
Definition: node.hpp:100
TreeNode & primary_node()
Return the TreeNode of this TreeEdge that points towards the root.
Definition: edge.hpp:145
void copy_pqueries(Sample const &source, Sample &target)
Copy all Pqueries from the source Sample (left parameter) to the target Sample (right parameter)...
BaseEdgeData * data_ptr()
Return a pointer to the data.
Definition: edge.hpp:212
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:95
void adjust_to_average_branch_lengths(SampleSet &sample_set)
Set the branch lengths of all Samples in the sample_set to the respecitve average branch length of th...
Definition: sample_set.cpp:173
Store a set of Samples with associated names.
Definition: sample_set.hpp:52
tree::Tree average_branch_length_tree(SampleSet const &sample_set)
Return the Tree that has edges with the average branch length of the respective edges of the Trees in...
Definition: sample_set.cpp:120
void adjust_branch_lengths(Sample &sample, tree::Tree const &source)
Take the branch lengths of the source Tree and use them as the new branch lengths of the sample...
size_t size() const
Return the size of the SampleSet, i.e., the number of Samples.
Definition: sample_set.hpp:234
Data class for PlacementTreeNodes. Stores a node name.
Default Tree functions.
Sample * find_sample(SampleSet &sample_set, std::string const &name)
Get the first Sample in a SampleSet that has a given name, or nullptr if not found.
Definition: sample_set.cpp:48
Manage a set of Pqueries along with the PlacementTree where the PqueryPlacements are placed on...
Definition: sample.hpp:68
tree::TreeSet tree_set(SampleSet const &sample_set)
Return a TreeSet containing all the trees of the SampleSet.
Definition: sample_set.cpp:157
std::string name
Name of the node.
std::ostream & operator<<(std::ostream &out, Sample const &smp)
Print a table of all Pqueries with their Placements and Names to the stream.
void add(std::string const &name, Tree const &tree)
Add a Tree with a name to the TreeSet.
Definition: tree_set.cpp:55
BaseNodeData * data_ptr()
Return a pointer to the data.
Definition: node.hpp:181
bool all_equal(TreeSet const &tset, std::function< bool(TreeNode const &, TreeNode const &)> node_comparator, std::function< bool(TreeEdge const &, TreeEdge const &)> edge_comparator)
Compare whether all Trees in a TreeSet are equal using a given comparator functional.
int edge_num() const
Return the edge_num of this edge. This value is defined by the jplace standard.