|
A library for working with phylogenetic and population genetic data.
v0.32.0
|
|
Go to the documentation of this file.
69 read( source, result );
103 if( num_seq_str.length() == 0 ) {
104 throw std::runtime_error(
105 "Malformed Phylip " + it.
source_name() +
": Expecting sequence number at "
114 if( len_seq_str.length() == 0 ) {
115 throw std::runtime_error(
116 "Malformed Phylip " + it.
source_name() +
": Expecting sequence length at "
124 throw std::runtime_error(
125 "Malformed Phylip " + it.
source_name() +
": Sequences are empty."
132 if( !it || *it !=
'\n' ) {
133 throw std::runtime_error(
134 "Malformed Phylip " + it.
source_name() +
": Expecting end of line at " + it.
at() +
"."
147 if( !it || ! ::isgraph( *it ) ) {
148 throw std::runtime_error(
149 "Malformed Phylip " + it.
source_name() +
": Expecting label at " + it.
at() +
"."
154 if( label_length_ == 0 ) {
156 if( !it || ! ::isblank( *it ) ) {
157 throw std::runtime_error(
158 "Malformed Phylip " + it.
source_name() +
": Expecting delimiting white space at "
166 for(
size_t i = 0; i < label_length_; ++i ) {
167 if( !it || ! ::isprint( *it ) ) {
168 throw std::runtime_error(
169 "Malformed Phylip " + it.
source_name() +
": Invalid label at " + it.
at() +
"."
180 assert( label.size() > 0 );
191 return c ==
' ' || c ==
'\t';
193 if( remove_digits_ ) {
195 return ::isdigit( c );
207 if( use_validation_ ) {
208 for(
auto const& c : seq ) {
210 throw std::runtime_error(
211 "Malformed Phylip " + it.
source_name() +
": Invalid sequence symbol "
226 size_t num_seq = header.num_sequences;
227 size_t len_seq = header.len_sequences;
231 for(
size_t seq_n = 0; seq_n < num_seq; ++seq_n ) {
232 assert( it.
column() == 1 );
242 seq.
sites().reserve( len_seq );
243 while( seq.
sites().length() < len_seq ) {
245 assert( it.
column() == 1 );
249 if( seq.
sites().length() > len_seq ) {
250 throw std::runtime_error(
251 "Malformed Phylip " + it.
source_name() +
": Sequence with length "
256 assert( seq.
sites().length() == len_seq );
265 throw std::runtime_error(
266 "Malformed Phylip " + it.
source_name() +
": Expected end of file at " + it.
at() +
"."
269 assert( sset.
size() == num_seq );
276 size_t num_seq = header.num_sequences;
277 size_t len_seq = header.len_sequences;
280 auto check_seq_len = [ &it, &len_seq ] (
Sequence const& seq ) {
281 if( seq.length() > len_seq ) {
282 throw std::runtime_error(
283 "Malformed Phylip " + it.
source_name() +
": Sequence with length "
291 for(
size_t seq_n = 0; seq_n < num_seq; ++seq_n ) {
292 assert( it.
column() == 1 );
299 seq.
sites().reserve( len_seq );
301 check_seq_len( seq );
309 auto unfinished_sequences = [ & ] () {
310 for(
auto const& seq : sset ) {
311 assert( seq.length() <= len_seq );
312 if( seq.length() < len_seq ) {
319 while( unfinished_sequences() ) {
322 throw std::runtime_error(
323 "Malformed Phylip " + it.
source_name() +
": Unexpected end of file at "
332 for(
size_t seq_n = 0; seq_n < num_seq; ++seq_n ) {
333 assert( it.
column() == 1 );
335 check_seq_len( sset[seq_n] );
339 assert( sset.
size() == num_seq );
359 label_length_ = value;
365 return label_length_;
370 site_casing_ = value;
381 remove_digits_ = value;
387 return remove_digits_;
392 if( chars.size() == 0 ) {
394 use_validation_ =
false;
398 use_validation_ =
true;
408 if( ! use_validation_ || lookup_.
all_equal_to(
true ) ) {
@ kToLower
Make all sites lower case.
Provides some valuable algorithms that are not part of the C++ 11 STL.
@ kToUpper
Make all sites upper case.
void parse_phylip_interleaved(utils::InputStream &it, SequenceSet &sset) const
Parse a whole Phylip file using the interleaved variant (Mode::kInterleaved).
SiteCasing
Enumeration of casing methods to apply to each site of a Sequence.
std::string trim_right(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with left trimmed white spaces (or any other delimiters).
std::string read_while(InputStream &source, char criterion)
Lexing function that reads from the stream while its current char equals the provided one....
Provides functions for accessing the file system.
utils::CharLookup< bool > & valid_char_lookup()
Return the internal CharLookup that is used for validating the Sequence sites.
bool all_equal_to(T comp_value) const
Return whether all chars compare equal to a given value.
std::string trim(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with trimmed white spaces (or any other delimiters).
std::string parse_phylip_label(utils::InputStream &it) const
Parse and return a Phylip label.
Provides some valuable additions to STD.
SiteCasing site_casing() const
Return whether Sequence sites are automatically turned into upper or lower case.
std::string to_string(GenomeLocus const &locus)
Provides some commonly used string utility functions.
SequenceSet read(std::shared_ptr< utils::BaseInputSource > source) const
Read all Sequences from an input source in Phylip format and return them as a SequenceSet.
Header parse_phylip_header(utils::InputStream &it) const
Parse a Phylip header and return the contained sequence count and length.
Read Phylip sequence data.
void erase_if(Container &c, UnaryPredicate p)
Erases all elements from the container that satisfy a given predicate. An element is erased,...
std::string to_upper_ascii(std::string const &str)
Return an all-uppercase copy of the given string, ASCII-only.
Mode
Enum to distinguish between the different file variants of Phylip. See mode( Mode value ) for more de...
void skip_while(InputStream &source, char criterion)
Lexing function that advances the stream while its current char equals the provided one.
std::string get_chars_equal_to(T comp_value) const
Return a std::string containg all chars which have lookup status equal to a given value.
std::string parse_phylip_sequence_line(utils::InputStream &it) const
Parse one sequence line.
void parse_phylip_sequential(utils::InputStream &it, SequenceSet &sset) const
Parse a whole Phylip file using the sequential variant (Mode::kSequential).
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
@ kInterleaved
Read the data in Phylip interleaved mode.
Store a set of Sequences.
reference add(Sequence const &s)
Add a Sequence to the SequenceSet by copying it, and return a reference to it.
size_t label_length() const
Return the currently set label length.
std::string to_lower_ascii(std::string const &str)
Return an all-lowercase copy of the given string, ASCII-only.
std::string char_to_hex(char c, bool full)
Return the name and hex representation of a char.
void set_all(T value)
Set the lookup status for all chars at once.
PhylipReader()
Create a default PhylipReader. Per default, chars are turned upper case, but not validated.
void set_selection(std::string const &chars, T value)
Set the lookup status for all chars that are contained in a given std::string.
std::string read_to_end_of_line(InputStream &source)
Lexing function that reads until the end of the line (i.e., to the new line char),...
bool remove_digits() const
Return whether digits are removed from the Sequence.
std::string valid_chars() const
Return the currently set chars used for validating Sequence sites.
@ kSequential
Read the data in Phylip sequential mode.