A toolkit for working with phylogenetic data.
v0.24.0
jplace_writer.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2020 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
33 #include <fstream>
34 #include <memory>
35 #include <sstream>
36 #include <stdexcept>
37 #include <string>
38 #include <vector>
39 
44 
46 
57 
58 namespace genesis {
59 namespace placement {
60 
61 // =================================================================================================
62 // Constructor and Rule of Five
63 // =================================================================================================
64 
66 {
67  program_ = "genesis " + genesis_version();
68  invocation_ = utils::Options::get().command_line_string();
69 }
70 
71 // =================================================================================================
72 // Printing
73 // =================================================================================================
74 
75 void JplaceWriter::write( Sample const& sample, std::shared_ptr<utils::BaseOutputTarget> target ) const
76 {
77  // Shorthand.
78  auto& os = target->ostream();
79 
80  // Indent. Might be replaced by some setting for the class in the future.
81  std::string in = " ";
82 
83  // Open json document.
84  os << "{\n";
85 
86  // Write version.
87  os << in << "\"version\": 3,\n";
88 
89  // Write metadata.
90  os << in << "\"metadata\": {\n";
91  os << in << in << "\"program\": \"" << utils::escape( program_ ) << "\",\n";
92  os << in << in << "\"invocation\": \"" << utils::escape( invocation_ ) << "\",\n";
93  os << in << in << "\"created\": \"" << utils::escape( utils::current_date() );
94  os << " " << utils::escape( utils::current_time() ) << "\"\n";
95  os << in << "},\n";
96 
97  // Write tree.
98  auto newick_writer = PlacementTreeNewickWriter();
99  newick_writer.enable_names(true);
100  newick_writer.enable_branch_lengths(true);
101  newick_writer.branch_length_precision( branch_length_precision_ );
102  newick_writer.trailing_new_line( false );
103  os << in << "\"tree\": \"";
104  os << utils::escape( newick_writer.to_string( sample.tree() ));
105  os << "\",\n";
106 
107  // Write field names.
108  os << in << "\"fields\": [ \"edge_num\", \"likelihood\", \"like_weight_ratio\", "
109  << "\"distal_length\", \"pendant_length\" ],\n";
110 
111  // Write pqueries.
112  os << in << "\"placements\": [\n";
113  for( size_t i = 0; i < sample.size(); ++i ) {
114  auto const& pquery = sample.at(i);
115  os << in << in << "{\n";
116 
117  // Write placements.
118  os << in << in << in << "\"p\": [\n";
119  for( size_t j = 0; j < pquery.placement_size(); ++j ) {
120  auto const& placement = pquery.placement_at(j);
121  os << in << in << in << in << "[ ";
122 
123  os << placement.edge_num() << ", ";
124  os << placement.likelihood << ", ";
125  os << placement.like_weight_ratio << ", ";
126 
127  auto const& edge_data = placement.edge().data<PlacementEdgeData>();
128  os << edge_data.branch_length - placement.proximal_length << ", ";
129  os << placement.pendant_length;
130 
131  os << " ]";
132  if( j < pquery.placement_size() - 1 ) {
133  os << ",";
134  }
135  os << "\n";
136  }
137  os << in << in << in << "],\n";
138 
139  // Find out whether names have multiplicity.
140  bool has_nm = false;
141  for( auto const& pqry_name : pquery.names() ) {
142  has_nm |= ( pqry_name.multiplicity != 1.0 );
143  }
144 
145  // Write names.
146  if( has_nm ) {
147 
148  // With multiplicity.
149  os << in << in << in << "\"nm\": [\n";
150  for( size_t j = 0; j < pquery.name_size(); ++j ) {
151  os << in << in << in << in << "[ \"";
152  os << utils::escape( pquery.name_at(j).name ) << "\", ";
153  os << pquery.name_at(j).multiplicity << " ]";
154 
155  if( j < pquery.name_size() - 1 ) {
156  os << ", ";
157  }
158  os << "\n";
159  }
160  os << in << in << in << "]\n";
161 
162  } else {
163 
164  // Without multiplicity.
165  os << in << in << in << "\"n\": [ ";
166  for( size_t j = 0; j < pquery.name_size(); ++j ) {
167  os << "\"" << utils::escape( pquery.name_at(j).name ) << "\"";
168 
169  if( j < pquery.name_size() - 1 ) {
170  os << ", ";
171  }
172  }
173  os << " ]\n";
174 
175  }
176 
177  // Write end of placement stuff.
178  os << in << in << "}";
179  if( i < sample.size() - 1 ) {
180  os << ",";
181  }
182  os << "\n";
183  }
184  os << in << "]\n";
185 
186  // Close json document.
187  os << "}\n";
188 }
189 
191 {
192  using namespace utils;
193  JsonDocument doc = JsonDocument::object();
194 
195  // set tree
196  auto newick_writer = PlacementTreeNewickWriter();
197  newick_writer.enable_names(true);
198  newick_writer.enable_branch_lengths(true);
199  newick_writer.branch_length_precision( branch_length_precision_ );
200  newick_writer.trailing_new_line( false );
201  doc[ "tree" ] = newick_writer.to_string( smp.tree() );
202 
203  // set placements
204  auto& placements_arr = doc[ "placements" ];
205  for( auto const& pqry : smp.pqueries() ) {
206  auto jpqry = JsonDocument::object();
207 
208  // set placements
209  auto pqry_p_arr = JsonDocument::array();
210  for( auto const& pqry_place : pqry.placements() ) {
211  auto pqry_fields = JsonDocument::array();
212 
213  pqry_fields.push_back( JsonDocument::number_unsigned( pqry_place.edge_num() ));
214  pqry_fields.push_back( JsonDocument::number_float( pqry_place.likelihood ));
215  pqry_fields.push_back( JsonDocument::number_float( pqry_place.like_weight_ratio ));
216 
217  // convert from proximal to distal length.
218  auto const& edge_data = pqry_place.edge().data<PlacementEdgeData>();
219  pqry_fields.push_back( JsonDocument::number_float(
220  edge_data.branch_length - pqry_place.proximal_length
221  ));
222  pqry_fields.push_back( JsonDocument::number_float( pqry_place.pendant_length ));
223 
224  pqry_p_arr.push_back( pqry_fields );
225  }
226  jpqry[ "p" ] = pqry_p_arr;
227 
228  // find out whether names have multiplicity
229  bool has_nm = false;
230  for( auto const& pqry_name : pqry.names() ) {
231  has_nm |= ( pqry_name.multiplicity != 1.0 );
232  }
233 
234  // set named multiplicity / name
235  if (has_nm) {
236  auto pqry_nm_arr = JsonDocument::array();
237  for( auto const& pqry_name : pqry.names() ) {
238  auto pqry_nm_val = JsonDocument::array();
239  pqry_nm_val.push_back( pqry_name.name );
240  pqry_nm_val.push_back( JsonDocument::number_float( pqry_name.multiplicity ));
241  pqry_nm_arr.push_back( pqry_nm_val );
242  }
243  jpqry[ "nm" ] = pqry_nm_arr;
244  } else {
245  auto pqry_n_arr = JsonDocument::array();
246  for( auto const& pqry_name : pqry.names() ) {
247  pqry_n_arr.push_back( pqry_name.name );
248  }
249  jpqry[ "n" ] = pqry_n_arr;
250  }
251 
252  placements_arr.push_back( jpqry );
253  }
254 
255  // set fields
256  auto jfields = JsonDocument::array();
257  jfields.push_back( "edge_num" );
258  jfields.push_back( "likelihood" );
259  jfields.push_back( "like_weight_ratio" );
260  jfields.push_back( "distal_length" );
261  jfields.push_back( "pendant_length" );
262  doc[ "fields" ] = jfields;
263 
264  // set version
265  doc[ "version" ] = 3;
266 
267  // set metadata
268  auto jmetadata = JsonDocument::object();
269  jmetadata[ "program" ] = program_;
270  jmetadata[ "invocation" ] = invocation_;
271  jmetadata[ "created" ] = utils::current_date() + " " + utils::current_time();
272  doc[ "metadata" ] = jmetadata;
273 
274  return doc;
275 }
276 
277 } // namespace placement
278 } // namespace genesis
std::string escape(std::string const &text)
Return a string where special chars are replaces by their escape sequence.
Definition: string.cpp:495
std::string current_date()
Returns the current date as a string in the format "2014-12-31".
Definition: date_time.cpp:68
Data class for PlacementTreeEdges. Stores the branch length of the edge, and the edge_num, as defined in the jplace standard.
PlacementTree & tree()
Get the PlacementTree of this Sample.
Definition: sample.cpp:119
utils::JsonDocument to_document(Sample const &sample) const
Store the data of a Sample in a JsonDocument object.
PlacementEdgeData::EdgeNumType edge_num() const
Get the edge_num where this PqueryPlacement is placed.
Pquery & at(size_t index)
Return the Pquery at a certain index.
Definition: sample.cpp:185
PqueryPlacement & placement_at(size_t index)
Return the PqueryPlacement at a certain index.
Definition: pquery.cpp:118
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
Provides some valuable additions to STD.
utils::Range< iterator_pqueries > pqueries()
Return a Range iterator to the Pqueries .
Definition: sample.cpp:259
std::string current_time()
Returns the current time as a string in the format "13:37:42".
Definition: date_time.cpp:88
size_t size() const
Return the number of Pqueries that are stored in this Sample.
Definition: sample.cpp:133
Provides some commonly used string utility functions.
std::string command_line_string() const
Returns a string containing the program&#39;s command line arguments.
Definition: options.cpp:85
Provides functions for accessing the file system.
void write(Sample const &sample, std::shared_ptr< utils::BaseOutputTarget > target) const
Write a Sample to an output target, using the Jplace format.
Provides functions for date and time access.
Provides easy and fast logging functionality.
double branch_length
Branch length of the edge.
Store a Json value of any kind.
Manage a set of Pqueries along with the PlacementTree where the PqueryPlacements are placed on...
Definition: sample.hpp:68
static Options & get()
Returns a single instance of this class.
Definition: options.hpp:60
Some stuff that is totally not imporatant, but nice.
std::string genesis_version()
Return the current genesis version.
Definition: version.hpp:63