A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
serializer.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2017 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
38 
39 #include <stdexcept>
40 
41 namespace genesis {
42 namespace placement {
43 
44 // =================================================================================================
45 // Version
46 // =================================================================================================
47 
52 unsigned char SampleSerializer::version = 1;
53 
54 // =================================================================================================
55 // Save
56 // =================================================================================================
57 
61 void SampleSerializer::save( Sample const& map, std::string const& file_name )
62 {
63  // Prepare.
64  utils::Serializer ser (file_name);
65  if (!ser) {
66  throw std::invalid_argument("Serialization failed.");
67  }
68 
69  // Write header.
70  char magic[] = "BPLACE\0\0";
71  ser.put_raw(magic, 8);
72  ser.put_int<unsigned char>(version);
73 
74  // Write tree.
75  // TODO if there is a tree serialization in the future, this one could be used here, and in
76  // addition to edge numbers, the edge indices can be stored, so that deserialization is easier.
77  auto nw = PlacementTreeNewickWriter();
78  nw.enable_names(true);
79  nw.enable_branch_lengths(true);
80  ser.put_string(nw.to_string(map.tree()));
81 
82  // Write pqueries.
83  ser.put_int(map.size());
84  for (auto& pqry : map.pqueries()) {
85 
86  // Write placements.
87  ser.put_int(pqry.placement_size());
88  for( auto const& place : pqry.placements() ) {
89  // We set the edge index instead of edge num. This is faster, simpler to resorte, and
90  // consinstend with Pquery.add_placement() parameters.
91  ser.put_int (place.edge().index());
92 
93  ser.put_float (place.likelihood);
94  ser.put_float (place.like_weight_ratio);
95  ser.put_float (place.proximal_length);
96  ser.put_float (place.pendant_length);
97  ser.put_int (place.parsimony);
98  }
99 
100  // Write names.
101  ser.put_int(pqry.name_size());
102  for( auto const& name : pqry.names() ) {
103  ser.put_string (name.name);
104  ser.put_float (name.multiplicity);
105  }
106  }
107 }
108 
109 // =================================================================================================
110 // Load
111 // =================================================================================================
112 
116 Sample SampleSerializer::load( std::string const& file_name )
117 {
118  // Create returned object.
119  Sample map;
120 
121  // Prepare, check stream status.
122  utils::Deserializer des( file_name );
123  if( ! des ) {
124  throw std::invalid_argument( "Deserialization failed: Cannot open file." );
125  }
126 
127  // Read and check header.
128  std::string magic = des.get_raw_string(8);
129  if (strncmp (magic.c_str(), "BPLACE\0\0", 8) != 0) {
130  throw std::invalid_argument("Wrong file format: \"" + magic + "\".");
131  }
132  auto ver = des.get_int<unsigned char>();
133  if (ver != version) {
134  throw std::invalid_argument("Wrong serialization version: " + std::to_string(ver));
135  }
136 
137  // Read and check tree.
138  auto tree_string = des.get_string();
139  map.tree() = PlacementTreeNewickReader().from_string( tree_string );
140 
141  // Read pqueries.
142  size_t num_pqueries = des.get_int<size_t>();
143  for (size_t i = 0; i < num_pqueries; ++i) {
144  Pquery& pqry = map.add();
145 
146  // Read placements.
147  size_t num_place = des.get_int<size_t>();
148  for (size_t p = 0; p < num_place; ++p) {
149  // Get edge index, add the placement there.
150  size_t edge_idx = des.get_int<size_t>();
151  auto& edge = map.tree().edge_at( edge_idx );
152  auto& place = pqry.add_placement( edge );
153 
154  place.likelihood = des.get_float<double>();
155  place.like_weight_ratio = des.get_float<double>();
156  place.proximal_length = des.get_float<double>();
157  place.pendant_length = des.get_float<double>();
158  place.parsimony = des.get_int<int>();
159  }
160 
161  // Read names.
162  size_t num_names = des.get_int<size_t>();
163  for (size_t n = 0; n < num_names; ++n) {
164  auto name = pqry.add_name( des.get_string() );
165  name.multiplicity = des.get_float<double>();
166  }
167  }
168 
169  if (!des.finished()) {
170  throw std::invalid_argument("Deserialization failed: File longer than expected.");
171  }
172 
173  return map;
174 }
175 
176 } // namespace placement
177 } // namespace genesis
size_t size() const
Return the number of Pqueries that are stored in this Sample.
Definition: sample.cpp:133
static unsigned char version
Version of this serialization helper. Is written to the stream and read again to make sure that diffe...
PlacementTree & tree()
Get the PlacementTree of this Sample.
Definition: sample.cpp:119
static Sample load(std::string const &file_name)
Loads a Sample from a binary file that was written by using save().
Definition: serializer.cpp:116
A pquery holds a set of PqueryPlacements and a set of PqueryNames.
Definition: pquery.hpp:82
PqueryName & add_name(std::string name="", double multiplicity=1.0)
Create a new PqueryName using the provided parameters, add it to the Pquery and return it...
Definition: pquery.cpp:252
std::string get_string()
Read a string from the stream, provided that its length it written preceding it, as done by put_strin...
Tree from_string(std::string const &tree_string) const
Read a Tree from a string containing a Newick tree.
std::string to_string(T const &v)
Return a string representation of a given value.
Definition: string.hpp:300
utils::Range< iterator_pqueries > pqueries()
Return a Range iterator to the Pqueries .
Definition: sample.cpp:259
void put_raw(char const *data, size_t n)
Write raw data, provided as a char array of length n, to the stream.
void put_float(const T v)
Write a floating point number to the stream.
Header of Serializer and Deserializer class.
double multiplicity
Multiplicity of the name.
Definition: name.hpp:117
PqueryPlacement & add_placement(PlacementTreeEdge &edge)
Create a new PqueryPlacement at a given PlacementTreeEdge, add it to the Pquery and return it...
Definition: pquery.cpp:113
std::string get_raw_string(size_t n)
Read n bytes from the stream and return them as a string.
Provides easy and fast logging functionality.
void put_string(const std::string &v)
Write a string, preceded by its length, to the stream. Use get_string() to read it.
Manage a set of Pqueries along with the PlacementTree where the PqueryPlacements are placed on...
Definition: sample.hpp:68
TreeEdge & edge_at(size_t index)
Return the TreeEdge at a certain index.
Definition: tree/tree.cpp:324
void put_int(const T v)
Write an integer number to the stream.
T get_int()
Read an integer number from the stream and return it.
Header of SampleSerializer class.
T get_float()
Read a floating point number from the stream and return it.
static void save(Sample const &map, std::string const &file_name)
Saves the Sample to a binary file that can later be read by using load().
Definition: serializer.cpp:61
Pquery & add()
Create an empty Pquery, add it to the Sample and return it.
Definition: sample.cpp:147
double likelihood
Total likelihood of the tree with this placement attached to it.