A toolkit for working with phylogenetic data.
v0.24.0
fasta_writer.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2020 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
37 
38 #include <cassert>
39 #include <fstream>
40 #include <sstream>
41 #include <stdexcept>
42 
43 namespace genesis {
44 namespace sequence {
45 
46 // =================================================================================================
47 // Writing
48 // =================================================================================================
49 
50 void FastaWriter::write( Sequence const& sequence, std::shared_ptr<utils::BaseOutputTarget> target ) const
51 {
52  write_sequence( sequence, target->ostream() );
53 }
54 
55 void FastaWriter::write( SequenceSet const& sequence_set, std::shared_ptr<utils::BaseOutputTarget> target ) const
56 {
57  auto& os = target->ostream();
58  for( Sequence const& sequence : sequence_set ) {
59  write_sequence( sequence, os );
60  }
61 }
62 
63 void FastaWriter::write_sequence( Sequence const& seq, std::ostream& os ) const
64 {
65  // Write label.
66  os << ">";
67  switch( abundance_notation_ ) {
69  os << seq.label();
70  break;
71  }
73  os << seq.label() << "_" << seq.abundance();
74  break;
75  }
77  os << seq.label() << ";size=" << seq.abundance();
78  break;
79  }
80  default: {
81  assert( false );
82  }
83  }
84  os << "\n";
85 
86  // Write sequence. If needed, add new line at every line_length_ position.
87  if (line_length_ > 0) {
88  for (size_t i = 0; i < seq.length(); i += line_length_) {
89  // Write line_length_ many characters.
90  // (If the string is shorter, as many characters as possible are used.)
91  os << seq.sites().substr(i, line_length_) << "\n";
92  }
93  } else {
94  os << seq.sites() << "\n";
95  }
96 }
97 
98 // =================================================================================================
99 // Properties
100 // =================================================================================================
101 
103 {
104  line_length_ = value;
105  return *this;
106 }
107 
109 {
110  return line_length_;
111 }
112 
114 {
115  abundance_notation_ = value;
116  return *this;
117 }
118 
120 {
121  return abundance_notation_;
122 }
123 
124 } // namespace sequence
125 } // namespace genesis
AbundanceNotation
Enumeration of types for how to write Sequence abundances.
Write abundances appended as a text of the form abc;size=123
Write abundances appaneded by an underscore: abc_123.
size_t line_length() const
Get the current line length.
void write_sequence(Sequence const &sequence, std::ostream &os) const
Write a single Sequence to an output stream in Fasta format.
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
Do not write abundances. This is the default.
AbundanceNotation abundance_notation() const
Get the current setting for how to write abundances.
void write(Sequence const &sequence, std::shared_ptr< utils::BaseOutputTarget > target) const
Write a single Sequence to an output target, using the Fasta format.
Provides functions for accessing the file system.
Store a set of Sequences.
size_t length() const
Return the length (number of sites) of this sequence.