A library for working with phylogenetic and population genetic data.
v0.32.0
bed_reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_FORMAT_BED_READER_H_
2 #define GENESIS_POPULATION_FORMAT_BED_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2024 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
39 
40 #include <functional>
41 #include <string>
42 #include <utility>
43 #include <vector>
44 
45 namespace genesis {
46 namespace population {
47 
48 // =================================================================================================
49 // BED Reader
50 // =================================================================================================
51 
91 class BedReader
92 {
93 public:
94 
95  // -------------------------------------------------------------------------
96  // Typedefs and Enums
97  // -------------------------------------------------------------------------
98 
106  struct Feature
107  {
108  std::string chrom;
109  size_t chrom_start;
110  size_t chrom_end;
111  std::string name;
112  size_t score;
113  char strand;
114  size_t thick_start;
115  size_t thick_end;
116  std::string item_rgb;
117  size_t block_count;
118  std::vector<size_t> block_sizes;
119  std::vector<size_t> block_starts;
120  };
121 
122  // -------------------------------------------------------------------------
123  // Constructors and Rule of Five
124  // -------------------------------------------------------------------------
125 
126  BedReader() = default;
127  ~BedReader() = default;
128 
129  BedReader( BedReader const& ) = default;
130  BedReader( BedReader&& ) = default;
131 
132  BedReader& operator= ( BedReader const& ) = default;
133  BedReader& operator= ( BedReader&& ) = default;
134 
135  // ---------------------------------------------------------------------
136  // Reading
137  // ---------------------------------------------------------------------
138 
142  std::vector<Feature> read(
143  std::shared_ptr< utils::BaseInputSource > source
144  ) const;
145 
156  std::shared_ptr< utils::BaseInputSource > source
157  ) const;
158 
171  std::shared_ptr< utils::BaseInputSource > source,
172  bool merge = false
173  ) const;
174 
181  std::shared_ptr< utils::BaseInputSource > source,
182  GenomeRegionList& target,
183  bool merge = false
184  ) const;
185 
186  // -------------------------------------------------------------------------
187  // Internal Helpers
188  // -------------------------------------------------------------------------
189 
190 private:
191 
196  void read_(
197  std::shared_ptr< utils::BaseInputSource > source,
198  std::function<void(Feature&&)> callback
199  ) const;
200 
208  size_t parse_line_(
209  utils::InputStream& input_stream,
210  Feature& feature
211  ) const;
212 
220  bool next_field_( utils::InputStream& input_stream, size_t& found_columns ) const;
221 
225  std::string parse_string_( utils::InputStream& input_stream ) const;
226 
227 };
228 
229 } // namespace population
230 } // namespace genesis
231 
232 #endif // include guard
genesis::population::BedReader::Feature::chrom_start
size_t chrom_start
Definition: bed_reader.hpp:109
genesis::utils::InputStream
Stream interface for reading data from an InputSource, that keeps track of line and column counters.
Definition: input_stream.hpp:88
genesis::population::BedReader::Feature::strand
char strand
Definition: bed_reader.hpp:113
genesis::population::merge
SampleCounts merge(SampleCounts const &p1, SampleCounts const &p2)
Merge the counts of two SampleCountss.
Definition: population/function/functions.cpp:400
genesis::population::BedReader::Feature::score
size_t score
Definition: bed_reader.hpp:112
genesis::population::GenomeLocusSet
List of positions/coordinates in a genome, for each chromosome.
Definition: genome_locus_set.hpp:75
genesis::population::BedReader::operator=
BedReader & operator=(BedReader const &)=default
genesis::population::BedReader::Feature::chrom
std::string chrom
Definition: bed_reader.hpp:108
genesis::population::BedReader::BedReader
BedReader()=default
genesis::population::GenomeRegionList
List of regions in a genome, for each chromosome.
Definition: genome_region_list.hpp:95
genesis::population::BedReader::Feature::thick_start
size_t thick_start
Definition: bed_reader.hpp:114
genesis::population::BedReader::Feature::block_count
size_t block_count
Definition: bed_reader.hpp:117
input_source.hpp
genome_region.hpp
input_stream.hpp
genesis::population::BedReader::Feature
Store all values that can typically appear in the columns of a BED file.
Definition: bed_reader.hpp:106
genesis::population::BedReader::Feature::chrom_end
size_t chrom_end
Definition: bed_reader.hpp:110
genesis::population::BedReader::Feature::thick_end
size_t thick_end
Definition: bed_reader.hpp:115
genome_region_list.hpp
genesis::population::BedReader::~BedReader
~BedReader()=default
genesis::population::BedReader::read
std::vector< Feature > read(std::shared_ptr< utils::BaseInputSource > source) const
Read a BED input source, and return its content as a list of Feature structs.
Definition: bed_reader.cpp:49
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genome_locus_set.hpp
genesis::population::BedReader::Feature::block_starts
std::vector< size_t > block_starts
Definition: bed_reader.hpp:119
genesis::population::BedReader::read_as_genome_locus_set
GenomeLocusSet read_as_genome_locus_set(std::shared_ptr< utils::BaseInputSource > source) const
Read an input source, and return its content as a GenomeLocusSet.
Definition: bed_reader.cpp:59
genesis::population::BedReader::Feature::block_sizes
std::vector< size_t > block_sizes
Definition: bed_reader.hpp:118
genesis::population::BedReader
Reader for BED (Browser Extensible Data) files.
Definition: bed_reader.hpp:91
genesis::population::BedReader::Feature::name
std::string name
Definition: bed_reader.hpp:111
genesis::population::BedReader::read_as_genome_region_list
GenomeRegionList read_as_genome_region_list(std::shared_ptr< utils::BaseInputSource > source, bool merge=false) const
Read a BED input source, and return its content as a GenomeRegionList.
Definition: bed_reader.cpp:69
genesis::population::BedReader::Feature::item_rgb
std::string item_rgb
Definition: bed_reader.hpp:116