A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
function/sample_set.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2017 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
37 
38 #include <ostream>
39 
40 namespace genesis {
41 namespace placement {
42 
43 // =================================================================================================
44 // Sample Set Functions
45 // =================================================================================================
46 
47 Sample* find_sample( SampleSet& sample_set, std::string const& name )
48 {
49  for (auto& nsmp : sample_set) {
50  if( nsmp.name == name ) {
51  return &nsmp.sample;
52  }
53  }
54  return nullptr;
55 }
56 
57 Sample const* find_sample( SampleSet const& sample_set, std::string const& name )
58 {
59  for (auto& nsmp : sample_set) {
60  if( nsmp.name == name ) {
61  return &nsmp.sample;
62  }
63  }
64  return nullptr;
65 }
66 
67 Sample merge_all( SampleSet const& sample_set )
68 {
69  // The following operations do a lot of traversals on all trees: first some for the
70  // average_branch_length_tree, then for the merging again. This could be turned into
71  // less traversals by copying code and doing all in one run. However, at the current point, this
72  // method will be called once in the beginning of a program run, and thus it is not necessary to
73  // optimize for speed. Instead, we opt for clean, separated and easy code here.
74 
75  if( sample_set.size() == 0 ) {
76  return Sample();
77  }
78 
79  // Create a new Sample and initialize it with the average branch length tree of all
80  // maps in this set, but without any placements.
81  auto res = Sample( average_branch_length_tree( sample_set ));
82 
83  // Copy the rest of the data from the first tree to the averaged tree.
84  // This is necessary, because the tree copy constructor does not do this for us.
85  // TODO fix this!
86  for (size_t i = 0; i < res.tree().node_count(); ++i) {
87  res.tree().node_at(i).data<PlacementNodeData>().name
88  = sample_set[0].sample.tree().node_at(i).data<PlacementNodeData>().name;
89  }
90  for (size_t i = 0; i < res.tree().edge_count(); ++i) {
91  res.tree().edge_at(i).data<PlacementEdgeData>().reset_edge_num(
92  sample_set[0].sample.tree().edge_at(i).data<PlacementEdgeData>().edge_num()
93  );
94  }
95 
96  // Add the placements from all maps of this set.
97  // In the merge method, we also check for identical topology (again), but mainly for identical
98  // taxa names and edge_nums, which is important for correct merging.
99  for (auto& smp : sample_set) {
100  copy_pqueries( smp.sample, res );
101  }
102 
103  return res;
104 }
105 
106 size_t total_pquery_count( SampleSet const& sample_set )
107 {
108  size_t s = 0;
109  for( auto const& sample : sample_set ) {
110  s += sample.sample.size();
111  }
112  return s;
113 }
114 
115 // =================================================================================================
116 // Tree Functions
117 // =================================================================================================
118 
120 {
121  return average_branch_length_tree( tree_set( sample_set ));
122 }
123 
124 bool all_identical_trees( SampleSet const& sample_set )
125 {
126  auto node_comparator = [] (
127  PlacementTreeNode const& node_l,
128  PlacementTreeNode const& node_r
129  ) {
130  auto l_ptr = dynamic_cast< PlacementNodeData const* >( node_l.data_ptr() );
131  auto r_ptr = dynamic_cast< PlacementNodeData const* >( node_r.data_ptr() );
132  if( l_ptr == nullptr || r_ptr == nullptr ) {
133  return false;
134  }
135  return l_ptr->name == r_ptr->name &&
136  node_l.index() == node_r.index();
137  };
138 
139  auto edge_comparator = [] (
140  PlacementTreeEdge const& edge_l,
141  PlacementTreeEdge const& edge_r
142  ) {
143  auto l_ptr = dynamic_cast< PlacementEdgeData const* >( edge_l.data_ptr() );
144  auto r_ptr = dynamic_cast< PlacementEdgeData const* >( edge_r.data_ptr() );
145  if( l_ptr == nullptr || r_ptr == nullptr ) {
146  return false;
147  }
148  return l_ptr->edge_num() == r_ptr->edge_num() &&
149  edge_l.primary_node().index() == edge_r.primary_node().index() &&
150  edge_l.secondary_node().index() == edge_r.secondary_node().index();
151  };
152 
153  return all_equal( tree_set( sample_set ), node_comparator, edge_comparator );
154 }
155 
156 tree::TreeSet tree_set( SampleSet const& sample_set )
157 {
158  tree::TreeSet tset;
159  for( auto const& smp : sample_set ) {
160  tset.add( smp.name, smp.sample.tree() );
161  }
162  return tset;
163 }
164 
165 void adjust_branch_lengths( SampleSet& sample_set, tree::Tree const& source )
166 {
167  for( auto& smp : sample_set ) {
168  adjust_branch_lengths( smp.sample, source );
169  }
170 }
171 
173 {
174  adjust_branch_lengths( sample_set, average_branch_length_tree( sample_set ));
175 }
176 
177 // =================================================================================================
178 // Output
179 // =================================================================================================
180 
181 std::ostream& operator << ( std::ostream& out, SampleSet const& sample_set )
182 {
183  // TODO this was meant for full output. turn it into a printer instead!
184  bool full = false;
185 
186  size_t i = 0;
187  for( auto const& cm : sample_set ) {
188  out << std::to_string(i) << ": " << cm.name << "\n";
189  if (full) {
190  out << cm.sample << "\n";
191  }
192  ++i;
193  }
194  return out;
195 }
196 
197 } // namespace placement
198 } // namespace genesis
size_t total_pquery_count(SampleSet const &sample_set)
Return the total number of Pqueries in the Samples of the SampleSet.
Data class for PlacementTreeEdges. Stores the branch length of the edge, and the edge_num, as defined in the jplace standard.
Sample merge_all(SampleSet const &sample_set)
Returns a Sample where all Samples of a SampleSet have been merged into.
BaseEdgeData * data_ptr()
Return a pointer to the data.
Definition: edge.cpp:128
Provides functions for working with Placements and Pqueries.
bool all_identical_trees(SampleSet const &sample_set)
Returns true iff all Trees of the Samples in the set are identical.
std::string to_string(T const &v)
Return a string representation of a given value.
Definition: string.hpp:300
size_t index() const
Return the index of this Node.
Definition: node.cpp:48
void copy_pqueries(Sample const &source, Sample &target)
Copy all Pqueries from the source Sample (left parameter) to the target Sample (right parameter)...
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:95
void adjust_to_average_branch_lengths(SampleSet &sample_set)
Set the branch lengths of all Samples in the sample_set to the respecitve average branch length of th...
Store a set of Samples with associated names.
Definition: sample_set.hpp:52
TreeNode & secondary_node()
Return the TreeNode of this TreeEdge that points away from the root.
Definition: edge.cpp:101
BaseNodeData * data_ptr()
Return a pointer to the data.
Definition: node.cpp:105
tree::Tree average_branch_length_tree(SampleSet const &sample_set)
Return the Tree that has edges with the average branch length of the respective edges of the Trees in...
void adjust_branch_lengths(Sample &sample, tree::Tree const &source)
Take the branch lengths of the source Tree and use them as the new branch lengths of the sample...
size_t size() const
Return the size of the SampleSet, i.e., the number of Samples.
Definition: sample_set.cpp:135
Data class for PlacementTreeNodes. Stores a node name.
Sample * find_sample(SampleSet &sample_set, std::string const &name)
Get the first Sample in a SampleSet that has a given name, or nullptr if not found.
Manage a set of Pqueries along with the PlacementTree where the PqueryPlacements are placed on...
Definition: sample.hpp:68
tree::TreeSet tree_set(SampleSet const &sample_set)
Return a TreeSet containing all the trees of the SampleSet.
std::string name
Name of the node.
TreeNode & primary_node()
Return the TreeNode of this TreeEdge that points towards the root.
Definition: edge.cpp:85
std::ostream & operator<<(std::ostream &out, Sample const &smp)
Print a table of all Pqueries with their Placements and Names to the stream.
void add(std::string const &name, Tree const &tree)
Add a Tree with a name to the TreeSet.
Definition: tree_set.cpp:55
bool all_equal(TreeSet const &tset, std::function< bool(TreeNode const &, TreeNode const &)> node_comparator, std::function< bool(TreeEdge const &, TreeEdge const &)> edge_comparator)
Compare whether all Trees in a TreeSet are equal using a given comparator functional.
int edge_num() const
Return the edge_num of this edge. This value is defined by the jplace standard.