A library for working with phylogenetic and population genetic data.
v0.27.0
gff_reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_FORMATS_GFF_READER_H_
2 #define GENESIS_POPULATION_FORMATS_GFF_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2022 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
38 
39 #include <string>
40 #include <utility>
41 #include <vector>
42 
43 namespace genesis {
44 namespace population {
45 
46 // =================================================================================================
47 // GFF/GTF Reader
48 // =================================================================================================
49 
68 class GffReader
69 {
70 public:
71 
72  // -------------------------------------------------------------------------
73  // Typedefs and Enums
74  // -------------------------------------------------------------------------
75 
76  // using Attribute = std::pair<std::string, std::string>;
77 
81  struct Feature
82  {
83  std::string seqname;
84  std::string source;
85  std::string feature;
86  size_t start;
87  size_t end;
88  double score;
89  char strand;
90  signed char frame;
91  std::string attributes_group;
92  // std::vector<Attribute> attributes;
93  };
94 
95  // -------------------------------------------------------------------------
96  // Constructors and Rule of Five
97  // -------------------------------------------------------------------------
98 
99  GffReader() = default;
100  ~GffReader() = default;
101 
102  GffReader( GffReader const& ) = default;
103  GffReader( GffReader&& ) = default;
104 
105  GffReader& operator= ( GffReader const& ) = default;
106  GffReader& operator= ( GffReader&& ) = default;
107 
108  // ---------------------------------------------------------------------
109  // Reading
110  // ---------------------------------------------------------------------
111 
115  std::vector<Feature> read( std::shared_ptr< utils::BaseInputSource > source ) const;
116 
129  std::shared_ptr< utils::BaseInputSource > source,
130  bool merge = false
131  ) const;
132 
139  std::shared_ptr< utils::BaseInputSource > source,
140  GenomeRegionList& target,
141  bool merge = false
142  ) const;
143 
144  // -------------------------------------------------------------------------
145  // Parsing
146  // -------------------------------------------------------------------------
147 
148  bool parse_line(
149  utils::InputStream& input_stream,
150  Feature& feature
151  ) const;
152 
153 };
154 
155 } // namespace population
156 } // namespace genesis
157 
158 #endif // include guard
genesis::utils::InputStream
Stream interface for reading data from an InputSource, that keeps track of line and column counters.
Definition: input_stream.hpp:81
genesis::population::GffReader::Feature::feature
std::string feature
Definition: gff_reader.hpp:85
genesis::population::GffReader::Feature::frame
signed char frame
Definition: gff_reader.hpp:90
genesis::population::GffReader::Feature::source
std::string source
Definition: gff_reader.hpp:84
genesis::population::GffReader::Feature::start
size_t start
Definition: gff_reader.hpp:86
genesis::population::GffReader::parse_line
bool parse_line(utils::InputStream &input_stream, Feature &feature) const
Definition: gff_reader.cpp:88
genesis::population::GffReader::Feature
Definition: gff_reader.hpp:81
genesis::population::GffReader::operator=
GffReader & operator=(GffReader const &)=default
genesis::population::GenomeRegionList
List of regions in a genome, for each chromosome.
Definition: genome_region_list.hpp:82
genesis::population::GffReader::Feature::attributes_group
std::string attributes_group
Definition: gff_reader.hpp:91
genesis::population::GffReader::Feature::seqname
std::string seqname
Definition: gff_reader.hpp:83
input_source.hpp
genome_region.hpp
input_stream.hpp
genesis::population::GffReader::read
std::vector< Feature > read(std::shared_ptr< utils::BaseInputSource > source) const
Read a GFF2/GFF3/GTF input source, and return its content as a list of Feature structs.
Definition: gff_reader.cpp:51
genome_region_list.hpp
genesis::population::merge
BaseCounts merge(BaseCounts const &p1, BaseCounts const &p2)
Merge the counts of two BaseCountss.
Definition: population/functions/functions.cpp:372
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::GffReader::Feature::end
size_t end
Definition: gff_reader.hpp:87
genesis::population::GffReader
Reader for GFF2 and GFF3 (General Feature Format) and GTF (General Transfer Format) files.
Definition: gff_reader.hpp:68
genesis::population::GffReader::GffReader
GffReader()=default
genesis::population::GffReader::read_as_genome_region_list
GenomeRegionList read_as_genome_region_list(std::shared_ptr< utils::BaseInputSource > source, bool merge=false) const
Read a GFF2/GFF3/GTF input source, and return its content as a GenomeRegionList.
Definition: gff_reader.cpp:63
genesis::population::GffReader::Feature::strand
char strand
Definition: gff_reader.hpp:89
genesis::population::GffReader::~GffReader
~GffReader()=default
genesis::population::GffReader::Feature::score
double score
Definition: gff_reader.hpp:88