A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fasta_writer.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
37 
38 #include <cassert>
39 #include <fstream>
40 #include <sstream>
41 #include <stdexcept>
42 
43 namespace genesis {
44 namespace sequence {
45 
46 // =================================================================================================
47 // Writing
48 // =================================================================================================
49 
50 void FastaWriter::write_sequence( Sequence const& seq, std::ostream& os ) const
51 {
52  // Write label.
53  os << ">";
54  switch( abundance_notation_ ) {
56  os << seq.label();
57  break;
58  }
60  os << seq.label() << "_" << seq.abundance();
61  break;
62  }
64  os << seq.label() << ";size=" << seq.abundance();
65  break;
66  }
67  default: {
68  assert( false );
69  }
70  }
71  os << "\n";
72 
73  // Write sequence. If needed, add new line at every line_length_ position.
74  if (line_length_ > 0) {
75  for (size_t i = 0; i < seq.length(); i += line_length_) {
76  // Write line_length_ many characters.
77  // (If the string is shorter, as many characters as possible are used.)
78  os << seq.sites().substr(i, line_length_) << "\n";
79  }
80  } else {
81  os << seq.sites() << "\n";
82  }
83 }
84 
85 void FastaWriter::to_stream( SequenceSet const& sset, std::ostream& os ) const
86 {
87  for( Sequence const& seq : sset ) {
88  write_sequence( seq, os );
89  }
90 }
91 
92 void FastaWriter::to_file( SequenceSet const& sset, std::string const& filename ) const
93 {
94  std::ofstream ofs;
95  utils::file_output_stream( filename, ofs );
96  to_stream( sset, ofs );
97 }
98 
99 std::string FastaWriter::to_string ( SequenceSet const& sset ) const
100 {
101  std::ostringstream oss;
102  to_stream( sset, oss );
103  return oss.str();
104 }
105 
106 // =================================================================================================
107 // Properties
108 // =================================================================================================
109 
111 {
112  line_length_ = value;
113  return *this;
114 }
115 
117 {
118  return line_length_;
119 }
120 
122 {
123  abundance_notation_ = value;
124  return *this;
125 }
126 
128 {
129  return abundance_notation_;
130 }
131 
132 } // namespace sequence
133 } // namespace genesis
size_t abundance() const
Definition: sequence.cpp:82
AbundanceNotation
Enumeration of types for how to write Sequence abundances.
void to_stream(SequenceSet const &sset, std::ostream &os) const
Write Sequences of a SequenceSet to a stream, using the Fasta format.
void to_file(SequenceSet const &sset, std::string const &fn) const
Write Sequences of a SequenceSet to a file, using the Fasta format.
Write abundances appended as a text of the form abc;size=123
size_t line_length() const
Get the current line length.
void file_output_stream(std::string const &filename, std::ofstream &out_stream, std::ios_base::openmode mode=std::ios_base::out)
Helper function to obtain an output stream to a file.
size_t length() const
Return the length (number of sites) of this sequence.
Definition: sequence.cpp:106
std::string const & sites() const
Definition: sequence.cpp:58
Write abundances appaneded by an underscore: abc_123.
Do not write abundances. This is the default.
std::string const & label() const
Definition: sequence.cpp:44
Provides functions for accessing the file system.
Store a set of Sequences.
void write_sequence(Sequence const &seq, std::ostream &os) const
Write a single Sequence to an output stream in Fasta format.
AbundanceNotation abundance_notation() const
Get the current setting for how to write abundances.
std::string to_string(SequenceSet const &sset) const
Return Sequences of a SequenceSet in form of a Fasta formatted string.