A toolkit for working with phylogenetic data.
v0.24.0
phylip_writer.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2020 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
38 
39 #include <cctype>
40 #include <fstream>
41 #include <sstream>
42 #include <stdexcept>
43 
44 namespace genesis {
45 namespace sequence {
46 
47 // =================================================================================================
48 // Writing
49 // =================================================================================================
50 
51 // void PhylipWriter::to_stream_interleaved( SequenceSet const& sequence_set, std::ostream& os ) const
52 // {
53 //
54 // }
55 
56 // void PhylipWriter::to_stream_sequential( SequenceSet const& sequence_set, std::ostream& os ) const
57 // {
58 //
59 // }
60 
61 void PhylipWriter::write( SequenceSet const& sequence_set, std::shared_ptr<utils::BaseOutputTarget> target ) const
62 {
63  // Shorthand
64  auto& os = target->ostream();
65 
66  // Check if this is an alignment and get its sequence length.
67  size_t length = 0;
68  for( Sequence const& s : sequence_set ) {
69  if (length == 0) {
70  length = s.length();
71  continue;
72  }
73  if (s.length() != length) {
74  throw std::runtime_error(
75  "Cannot write SequenceSet to Phylip format: Sequences do not have the same length."
76  );
77  }
78  }
79  if (length == 0) {
80  throw std::runtime_error( "Cannot write SequenceSet to Phylip format: Sequences are empty." );
81  }
82 
83  // Write sequences to stream.
84  os << sequence_set.size() << " " << length << "\n";
85  for (Sequence const& s : sequence_set) {
86  // Write label.
87  if( label_length_ == 0 ) {
88  auto const f = std::find_if( std::begin(s.label()), std::end(s.label()), ::isgraph );
89  if( std::end(s.label()) != f ) {
90  throw std::runtime_error(
91  "Cannot write Sequence to Philip: Sequence label contains non-printable or "
92  "whitespace characters, which cannot be used in the relaxed Phylip format."
93  );
94  }
95  os << s.label() << " ";
96  } else {
97  os << s.label().substr( 0, label_length_ );
98  if( label_length_ > s.label().length() ) {
99  os << std::string( label_length_ - s.label().length(), ' ' );
100  }
101  }
102 
103  // Write sequence. If needed, add new line at every line_length_ position.
104  if( line_length_ > 0 ) {
105  for( size_t i = 0; i < s.length(); i += line_length_ ) {
106  // Write line_length_ many characters.
107  // (If the string is shorter, as many characters as possible are used.)
108  os << s.sites().substr( i, line_length_ ) << "\n";
109  }
110  } else {
111  os << s.sites() << "\n";
112  }
113  }
114 }
115 
116 // =================================================================================================
117 // Properties
118 // =================================================================================================
119 
121 {
122  label_length_ = value;
123  return *this;
124 }
125 
127 {
128  return label_length_;
129 }
130 
132 {
133  line_length_ = value;
134  return *this;
135 }
136 
138 {
139  return line_length_;
140 }
141 
142 } // namespace sequence
143 } // namespace genesis
Provides some valuable algorithms that are not part of the C++ 11 STL.
size_t line_length() const
Get the current line length.
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
size_t label_length() const
Return the currently set label length.
Provides functions for accessing the file system.
Store a set of Sequences.
void write(SequenceSet const &sequence_set, std::shared_ptr< utils::BaseOutputTarget > target) const
Write a SequenceSet to an output target, using the Phylip format.
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.