A library for working with phylogenetic and population genetic data.
v0.32.0
phylip_writer.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2020 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
38 
39 #include <cctype>
40 #include <fstream>
41 #include <sstream>
42 #include <stdexcept>
43 
44 namespace genesis {
45 namespace sequence {
46 
47 // =================================================================================================
48 // Writing
49 // =================================================================================================
50 
51 // void PhylipWriter::to_stream_interleaved( SequenceSet const& sequence_set, std::ostream& os ) const
52 // {
53 //
54 // }
55 
56 // void PhylipWriter::to_stream_sequential( SequenceSet const& sequence_set, std::ostream& os ) const
57 // {
58 //
59 // }
60 
61 void PhylipWriter::write( SequenceSet const& sequence_set, std::shared_ptr<utils::BaseOutputTarget> target ) const
62 {
63  // Shorthand
64  auto& os = target->ostream();
65 
66  // Check if this is an alignment and get its sequence length.
67  size_t length = 0;
68  for( Sequence const& s : sequence_set ) {
69  if (length == 0) {
70  length = s.length();
71  continue;
72  }
73  if (s.length() != length) {
74  throw std::runtime_error(
75  "Cannot write SequenceSet to Phylip format: Sequences do not have the same length."
76  );
77  }
78  }
79  if (length == 0) {
80  throw std::runtime_error( "Cannot write SequenceSet to Phylip format: Sequences are empty." );
81  }
82 
83  // Write sequences to stream.
84  os << sequence_set.size() << " " << length << "\n";
85  for (Sequence const& s : sequence_set) {
86  // Write label.
87  if( label_length_ == 0 ) {
88  auto const f = std::find_if( std::begin(s.label()), std::end(s.label()), [](char c){
89  return ! ::isgraph(c);
90  });
91  if( std::end(s.label()) != f ) {
92  throw std::runtime_error(
93  "Cannot write Sequence to Philip: Sequence label \"" + s.label() +
94  "\" contains non-printable or whitespace characters, "
95  "which cannot be used in the relaxed Phylip format."
96  );
97  }
98  os << s.label() << " ";
99  } else {
100  os << s.label().substr( 0, label_length_ );
101  if( label_length_ > s.label().length() ) {
102  os << std::string( label_length_ - s.label().length(), ' ' );
103  }
104  }
105 
106  // Write sequence. If needed, add new line at every line_length_ position.
107  if( line_length_ > 0 ) {
108  for( size_t i = 0; i < s.length(); i += line_length_ ) {
109  // Write line_length_ many characters.
110  // (If the string is shorter, as many characters as possible are used.)
111  os << s.sites().substr( i, line_length_ ) << "\n";
112  }
113  } else {
114  os << s.sites() << "\n";
115  }
116  }
117 }
118 
119 // =================================================================================================
120 // Properties
121 // =================================================================================================
122 
124 {
125  label_length_ = value;
126  return *this;
127 }
128 
130 {
131  return label_length_;
132 }
133 
135 {
136  line_length_ = value;
137  return *this;
138 }
139 
141 {
142  return line_length_;
143 }
144 
145 } // namespace sequence
146 } // namespace genesis
algorithm.hpp
Provides some valuable algorithms that are not part of the C++ 11 STL.
genesis::sequence::PhylipWriter::line_length
size_t line_length() const
Get the current line length.
Definition: phylip_writer.cpp:140
fs.hpp
Provides functions for accessing the file system.
genesis::sequence::Sequence
Definition: sequence/sequence.hpp:40
genesis::sequence::PhylipWriter
Definition: phylip_writer.hpp:52
output_stream.hpp
genesis::tree::length
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.
Definition: tree/common_tree/functions.cpp:160
sequence_set.hpp
genesis::sequence::PhylipWriter::label_length
size_t label_length() const
Return the currently set label length.
Definition: phylip_writer.cpp:129
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::sequence::SequenceSet
Store a set of Sequences.
Definition: sequence_set.hpp:53
genesis::sequence::PhylipWriter::write
void write(SequenceSet const &sequence_set, std::shared_ptr< utils::BaseOutputTarget > target) const
Write a SequenceSet to an output target, using the Phylip format.
Definition: phylip_writer.cpp:61
phylip_writer.hpp
sequence.hpp