A library for working with phylogenetic and population genetic data.
v0.27.0
bed_reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_FORMATS_BED_READER_H_
2 #define GENESIS_POPULATION_FORMATS_BED_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2022 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
38 
39 #include <functional>
40 #include <string>
41 #include <utility>
42 #include <vector>
43 
44 namespace genesis {
45 namespace population {
46 
47 // =================================================================================================
48 // BED Reader
49 // =================================================================================================
50 
90 class BedReader
91 {
92 public:
93 
94  // -------------------------------------------------------------------------
95  // Typedefs and Enums
96  // -------------------------------------------------------------------------
97 
105  struct Feature
106  {
107  std::string chrom;
108  size_t chrom_start;
109  size_t chrom_end;
110  std::string name;
111  size_t score;
112  char strand;
113  size_t thick_start;
114  size_t thick_end;
115  std::string item_rgb;
116  size_t block_count;
117  std::vector<size_t> block_sizes;
118  std::vector<size_t> block_starts;
119  };
120 
121  // -------------------------------------------------------------------------
122  // Constructors and Rule of Five
123  // -------------------------------------------------------------------------
124 
125  BedReader() = default;
126  ~BedReader() = default;
127 
128  BedReader( BedReader const& ) = default;
129  BedReader( BedReader&& ) = default;
130 
131  BedReader& operator= ( BedReader const& ) = default;
132  BedReader& operator= ( BedReader&& ) = default;
133 
134  // ---------------------------------------------------------------------
135  // Reading
136  // ---------------------------------------------------------------------
137 
141  std::vector<Feature> read(
142  std::shared_ptr< utils::BaseInputSource > source
143  ) const;
144 
157  std::shared_ptr< utils::BaseInputSource > source,
158  bool merge = false
159  ) const;
160 
167  std::shared_ptr< utils::BaseInputSource > source,
168  GenomeRegionList& target,
169  bool merge = false
170  ) const;
171 
172  // -------------------------------------------------------------------------
173  // Internal Helpers
174  // -------------------------------------------------------------------------
175 
176 private:
177 
182  void read_(
183  std::shared_ptr< utils::BaseInputSource > source,
184  std::function<void(Feature&&)> callback
185  ) const;
186 
194  size_t parse_line_(
195  utils::InputStream& input_stream,
196  Feature& feature
197  ) const;
198 
206  bool next_field_( utils::InputStream& input_stream, size_t& found_columns ) const;
207 
211  std::string parse_string_( utils::InputStream& input_stream ) const;
212 
213 };
214 
215 } // namespace population
216 } // namespace genesis
217 
218 #endif // include guard
genesis::population::BedReader::Feature::chrom_start
size_t chrom_start
Definition: bed_reader.hpp:108
genesis::utils::InputStream
Stream interface for reading data from an InputSource, that keeps track of line and column counters.
Definition: input_stream.hpp:81
genesis::population::BedReader::Feature::strand
char strand
Definition: bed_reader.hpp:112
genesis::population::BedReader::Feature::score
size_t score
Definition: bed_reader.hpp:111
genesis::population::BedReader::operator=
BedReader & operator=(BedReader const &)=default
genesis::population::BedReader::Feature::chrom
std::string chrom
Definition: bed_reader.hpp:107
genesis::population::BedReader::BedReader
BedReader()=default
genesis::population::GenomeRegionList
List of regions in a genome, for each chromosome.
Definition: genome_region_list.hpp:82
genesis::population::BedReader::Feature::thick_start
size_t thick_start
Definition: bed_reader.hpp:113
genesis::population::BedReader::Feature::block_count
size_t block_count
Definition: bed_reader.hpp:116
input_source.hpp
genome_region.hpp
input_stream.hpp
genesis::population::BedReader::Feature
Store all values that can typically appear in the columns of a BED file.
Definition: bed_reader.hpp:105
genesis::population::BedReader::Feature::chrom_end
size_t chrom_end
Definition: bed_reader.hpp:109
genesis::population::BedReader::Feature::thick_end
size_t thick_end
Definition: bed_reader.hpp:114
genome_region_list.hpp
genesis::population::BedReader::~BedReader
~BedReader()=default
genesis::population::merge
BaseCounts merge(BaseCounts const &p1, BaseCounts const &p2)
Merge the counts of two BaseCountss.
Definition: population/functions/functions.cpp:372
genesis::population::BedReader::read
std::vector< Feature > read(std::shared_ptr< utils::BaseInputSource > source) const
Read a BED input source, and return its content as a list of Feature structs.
Definition: bed_reader.cpp:49
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::BedReader::Feature::block_starts
std::vector< size_t > block_starts
Definition: bed_reader.hpp:118
genesis::population::BedReader::Feature::block_sizes
std::vector< size_t > block_sizes
Definition: bed_reader.hpp:117
genesis::population::BedReader
Reader for BED (Browser Extensible Data) files.
Definition: bed_reader.hpp:90
genesis::population::BedReader::Feature::name
std::string name
Definition: bed_reader.hpp:110
genesis::population::BedReader::read_as_genome_region_list
GenomeRegionList read_as_genome_region_list(std::shared_ptr< utils::BaseInputSource > source, bool merge=false) const
Read a BED input source, and return its content as a GenomeRegionList.
Definition: bed_reader.cpp:59
genesis::population::BedReader::Feature::item_rgb
std::string item_rgb
Definition: bed_reader.hpp:115