A library for working with phylogenetic and population genetic data.
v0.32.0
genome_region.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_GENOME_REGION_H_
2 #define GENESIS_POPULATION_GENOME_REGION_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2022 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
34 #include <iosfwd>
35 #include <stdexcept>
36 #include <string>
37 #include <type_traits>
38 #include <vector>
39 
40 namespace genesis {
41 namespace population {
42 
43 // =================================================================================================
44 // Genome Region
45 // =================================================================================================
46 
71 {
72 public:
73 
74  std::string chromosome;
75  size_t start = 0;
76  size_t end = 0;
77 
78  GenomeRegion( std::string const& chr = "", size_t s = 0, size_t e = 0 )
79  : chromosome(chr)
80  , start(s)
81  , end(e)
82  {
83  throw_if_invalid_();
84  }
85 
86  bool empty() const
87  {
88  throw_if_invalid_();
89  return chromosome == "" && start == 0 && end == 0;
90  }
91 
92  bool specified() const
93  {
94  throw_if_invalid_();
95  return chromosome != "" && start != 0 && end != 0;
96  }
97 
98 private:
99 
100  void throw_if_invalid_() const
101  {
102  if( chromosome == "" && ( start > 0 || end > 0 )) {
103  throw std::runtime_error(
104  "Invalid GenomeRegion: Empty chromosome but non-zero start and/or end."
105  );
106  }
107  if( chromosome != "" && (( start > 0 ) ^ ( end > 0 ))) {
108  throw std::runtime_error(
109  "Invalid GenomeRegion: Non-empty chromosome with non-zero start or end."
110  );
111  }
112  if( start > end ) {
113  throw std::invalid_argument(
114  "Invalid GenomeRegion with start == " +
115  std::to_string( start ) + " > end == " + std::to_string( end )
116  );
117  }
118  }
119 };
120 
121 // Alternative version that also has a data field.
122 //
123 // // =================================================================================================
124 // // Genome Region
125 // // =================================================================================================
126 //
127 // /**
128 // * @brief A region (between a start and and end position) on a chromosome.
129 // *
130 // * This can be used to represent a gene, a feature, or just generally a region of interest.
131 // * We use a simple form with a chromosome name, and a start and end position, both inclusive,
132 // * that is, a closed interval. Both start and end can also be identical, in which case they
133 // * denote a single position; see also GenomeLocus for a class representing this.
134 // *
135 // * We use 1-based positions, in order to comply with common chromosome annotation formats.
136 // * Furthermore, we use an empty chromosome string and/or positions 0 as indicators of an empty or
137 // * default constructed locus.
138 // *
139 // * The class takes an extra data type as template parameter, which can be used to store
140 // * arbitrary data for this locus.
141 // *
142 // * @see GenomeLocus
143 // * @see GenomeRegionList
144 // */
145 // template<class DataType = EmptyGenomeData>
146 // class GenomeRegion
147 // {
148 // public:
149 //
150 // // -------------------------------------------------------------------------
151 // // Typedefs
152 // // -------------------------------------------------------------------------
153 //
154 // using data_type = DataType;
155 //
156 // // -------------------------------------------------------------------------
157 // // Constructors and Rule of Five
158 // // -------------------------------------------------------------------------
159 //
160 // /**
161 // * @brief Default construct an empty region.
162 // */
163 // GenomeRegion() = default;
164 //
165 // /**
166 // * @brief Construct a region with a chromosome and a start and end position.
167 // */
168 // GenomeRegion( std::string const& chr, size_t start, size_t end )
169 // : chromosome_( chr )
170 // , start_(start)
171 // , end_(end)
172 // : GenomeRegion( chr, start, end, DataType{} )
173 // {
174 // if( chr.empty() || start == 0 || end == 0 ) {
175 // throw std::invalid_argument(
176 // "Cannot construct GenomeRegion with empty chromosome or position zero."
177 // );
178 // }
179 // if( start > end ) {
180 // throw std::invalid_argument(
181 // "Cannot construct GenomeRegion with start > end."
182 // );
183 // }
184 // }
185 //
186 // /**
187 // * @brief Construct a region with a chromosome and a start and end position,
188 // * and copy the data.
189 // */
190 // GenomeRegion( std::string const& chr, size_t start, size_t end, DataType const& data )
191 // : GenomeRegion( chr, start, end, DataType{ data })
192 // {}
193 //
194 // /**
195 // * @brief Construct a region with a chromosome and a start and end position,
196 // * and move the data.
197 // */
198 // GenomeRegion( std::string const& chr, size_t start, size_t end, DataType&& data )
199 // : chromosome_( chr )
200 // , start_(start)
201 // , end_(end)
202 // , data_( std::move( data ))
203 // {
204 // if( chr.empty() || start == 0 || end == 0 ) {
205 // throw std::invalid_argument(
206 // "Cannot construct GenomeRegion with empty chromosome or position zero."
207 // );
208 // }
209 // }
210 //
211 // /**
212 // * @brief Construct a region from a GenomeLocus of the same data type.
213 // *
214 // * This uses the GenomeLocus::position() for both start and end, and copies the data.
215 // */
216 // template<
217 // class OtherDataType,
218 // typename std::enable_if< std::is_same<DataType, OtherDataType>::value >::type = 0
219 // >
220 // GenomeRegion( GenomeLocus<OtherDataType> const& locus )
221 // : GenomeRegion(
222 // locus.chromosome(), locus.position(), locus.position(), locus.data()
223 // )
224 // {}
225 //
226 // /**
227 // * @brief Construct a region from a GenomeLocus of the same data type.
228 // *
229 // * This uses the GenomeLocus::position() for both start and end, and moves the data.
230 // */
231 // template<
232 // class OtherDataType,
233 // typename std::enable_if< std::is_same<DataType, OtherDataType>::value >::type = 0
234 // >
235 // GenomeRegion( GenomeLocus<OtherDataType>&& locus )
236 // : GenomeRegion(
237 // locus.chromosome(), locus.position(), locus.position(), std::move( locus.data() )
238 // )
239 // {}
240 //
241 // /**
242 // * @brief Construct a region from a GenomeLocus of a different data type.
243 // *
244 // * This uses the GenomeLocus::position() for both start and end,
245 // * and default constructs the data.
246 // */
247 // template<
248 // class OtherDataType,
249 // typename std::enable_if< ! std::is_same<DataType, OtherDataType>::value >::type = 0
250 // >
251 // GenomeRegion( GenomeLocus<OtherDataType> const& locus )
252 // : GenomeRegion( locus.chromosome(), locus.position(), locus.position() )
253 // {}
254 //
255 // ~GenomeRegion() = default;
256 //
257 // GenomeRegion( GenomeRegion const& ) = default;
258 // GenomeRegion( GenomeRegion&& ) = default;
259 //
260 // GenomeRegion& operator= ( GenomeRegion const& ) = default;
261 // GenomeRegion& operator= ( GenomeRegion&& ) = default;
262 //
263 // // -------------------------------------------------------------------------
264 // // Accessors
265 // // -------------------------------------------------------------------------
266 //
267 // std::string const& chromosome() const
268 // {
269 // return chromosome_;
270 // }
271 //
272 // size_t start() const
273 // {
274 // return start_;
275 // }
276 //
277 // size_t end() const
278 // {
279 // return end_;
280 // }
281 //
282 // data_type& data()
283 // {
284 // return data_;
285 // }
286 //
287 // data_type const& data() const
288 // {
289 // return data_;
290 // }
291 //
292 // size_t length() const
293 // {
294 // // Closed interval, so we need to add 1.
295 // return end_ - start_ + 1;
296 // }
297 //
298 // bool empty() const
299 // {
300 // return chromosome_ == "" && start_ == 0 && end_ == 0;
301 // }
302 //
303 // // -------------------------------------------------------------------------
304 // // Operators
305 // // -------------------------------------------------------------------------
306 //
307 // operator std::string() const
308 // {
309 // return to_string();
310 // }
311 //
312 // std::string to_string() const
313 // {
314 // if( start_ == 0 && end_ == 0 ) {
315 // return chromosome_;
316 // } else if( start_ == end_ ) {
317 // return chromosome_ + ":" + std::to_string( start_ );
318 // } else {
319 // return
320 // chromosome_ + ":" +
321 // std::to_string( start_ ) + "-" +
322 // std::to_string( end_ )
323 // ;
324 // }
325 // }
326 //
327 // // -------------------------------------------------------------------------
328 // // Member Variables
329 // // -------------------------------------------------------------------------
330 //
331 // std::string chromosome_;
332 // size_t start_ = 0;
333 // size_t end_ = 0;
334 //
335 // data_type data_;
336 //
337 // };
338 
339 } // namespace population
340 } // namespace genesis
341 
342 #endif // include guard
genesis::population::GenomeRegion::start
size_t start
Definition: genome_region.hpp:75
genesis::population::GenomeRegion::chromosome
std::string chromosome
Definition: genome_region.hpp:74
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: function/genome_locus.hpp:52
genesis::population::GenomeRegion::GenomeRegion
GenomeRegion(std::string const &chr="", size_t s=0, size_t e=0)
Definition: genome_region.hpp:78
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::GenomeRegion
A region (between two positions) on a chromosome.
Definition: genome_region.hpp:70
genesis::population::GenomeRegion::empty
bool empty() const
Definition: genome_region.hpp:86
genesis::population::GenomeRegion::specified
bool specified() const
Definition: genome_region.hpp:92
genesis::population::GenomeRegion::end
size_t end
Definition: genome_region.hpp:76