A library for working with phylogenetic and population genetic data.
v0.32.0
gff_reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_FORMAT_GFF_READER_H_
2 #define GENESIS_POPULATION_FORMAT_GFF_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2024 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
39 
40 #include <string>
41 #include <utility>
42 #include <vector>
43 
44 namespace genesis {
45 namespace population {
46 
47 // =================================================================================================
48 // GFF/GTF Reader
49 // =================================================================================================
50 
69 class GffReader
70 {
71 public:
72 
73  // -------------------------------------------------------------------------
74  // Typedefs and Enums
75  // -------------------------------------------------------------------------
76 
77  // using Attribute = std::pair<std::string, std::string>;
78 
82  struct Feature
83  {
84  std::string seqname;
85  std::string source;
86  std::string feature;
87  size_t start;
88  size_t end;
89  double score;
90  char strand;
91  signed char frame;
92  std::string attributes_group;
93  // std::vector<Attribute> attributes;
94  };
95 
96  // -------------------------------------------------------------------------
97  // Constructors and Rule of Five
98  // -------------------------------------------------------------------------
99 
100  GffReader() = default;
101  ~GffReader() = default;
102 
103  GffReader( GffReader const& ) = default;
104  GffReader( GffReader&& ) = default;
105 
106  GffReader& operator= ( GffReader const& ) = default;
107  GffReader& operator= ( GffReader&& ) = default;
108 
109  // ---------------------------------------------------------------------
110  // Reading
111  // ---------------------------------------------------------------------
112 
116  std::vector<Feature> read( std::shared_ptr< utils::BaseInputSource > source ) const;
117 
128  std::shared_ptr< utils::BaseInputSource > source
129  ) const;
130 
143  std::shared_ptr< utils::BaseInputSource > source,
144  bool merge = false
145  ) const;
146 
153  std::shared_ptr< utils::BaseInputSource > source,
154  GenomeRegionList& target,
155  bool merge = false
156  ) const;
157 
158  // -------------------------------------------------------------------------
159  // Parsing
160  // -------------------------------------------------------------------------
161 
162  bool parse_line(
163  utils::InputStream& input_stream,
164  Feature& feature
165  ) const;
166 
167 };
168 
169 } // namespace population
170 } // namespace genesis
171 
172 #endif // include guard
genesis::utils::InputStream
Stream interface for reading data from an InputSource, that keeps track of line and column counters.
Definition: input_stream.hpp:88
genesis::population::merge
SampleCounts merge(SampleCounts const &p1, SampleCounts const &p2)
Merge the counts of two SampleCountss.
Definition: population/function/functions.cpp:400
genesis::population::GffReader::Feature::feature
std::string feature
Definition: gff_reader.hpp:86
genesis::population::GffReader::Feature::frame
signed char frame
Definition: gff_reader.hpp:91
genesis::population::GffReader::Feature::source
std::string source
Definition: gff_reader.hpp:85
genesis::population::GffReader::Feature::start
size_t start
Definition: gff_reader.hpp:87
genesis::population::GffReader::parse_line
bool parse_line(utils::InputStream &input_stream, Feature &feature) const
Definition: gff_reader.cpp:100
genesis::population::GenomeLocusSet
List of positions/coordinates in a genome, for each chromosome.
Definition: genome_locus_set.hpp:75
genesis::population::GffReader::Feature
Definition: gff_reader.hpp:82
genesis::population::GffReader::operator=
GffReader & operator=(GffReader const &)=default
genesis::population::GenomeRegionList
List of regions in a genome, for each chromosome.
Definition: genome_region_list.hpp:95
genesis::population::GffReader::Feature::attributes_group
std::string attributes_group
Definition: gff_reader.hpp:92
genesis::population::GffReader::Feature::seqname
std::string seqname
Definition: gff_reader.hpp:84
input_source.hpp
genome_region.hpp
input_stream.hpp
genesis::population::GffReader::read
std::vector< Feature > read(std::shared_ptr< utils::BaseInputSource > source) const
Read a GFF2/GFF3/GTF input source, and return its content as a list of Feature structs.
Definition: gff_reader.cpp:51
genome_region_list.hpp
genesis::population::GffReader::read_as_genome_locus_set
GenomeLocusSet read_as_genome_locus_set(std::shared_ptr< utils::BaseInputSource > source) const
Read an input source, and return its content as a GenomeLocusSet.
Definition: gff_reader.cpp:63
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::GffReader::Feature::end
size_t end
Definition: gff_reader.hpp:88
genome_locus_set.hpp
genesis::population::GffReader
Reader for GFF2 and GFF3 (General Feature Format) and GTF (General Transfer Format) files.
Definition: gff_reader.hpp:69
genesis::population::GffReader::GffReader
GffReader()=default
genesis::population::GffReader::read_as_genome_region_list
GenomeRegionList read_as_genome_region_list(std::shared_ptr< utils::BaseInputSource > source, bool merge=false) const
Read a GFF2/GFF3/GTF input source, and return its content as a GenomeRegionList.
Definition: gff_reader.cpp:75
genesis::population::GffReader::Feature::strand
char strand
Definition: gff_reader.hpp:90
genesis::population::GffReader::~GffReader
~GffReader()=default
genesis::population::GffReader::Feature::score
double score
Definition: gff_reader.hpp:89