|
A library for working with phylogenetic and population genetic data.
v0.32.0
|
|
Go to the documentation of this file. 1 #ifndef GENESIS_POPULATION_GENOME_REGION_LIST_H_
2 #define GENESIS_POPULATION_GENOME_REGION_LIST_H_
46 namespace population {
139 void add( std::string
const& chromosome )
142 add( chromosome, 0, 0 );
155 std::string
const& chromosome,
161 if( chromosome.empty() ) {
162 throw std::invalid_argument(
163 "Cannot add region to GenomeRegionList with empty chromosome name, "
164 "as this denotes an invalid chromosome."
172 throw std::invalid_argument(
173 "Cannot add region to GenomeRegionList with start == " +
177 if(( start == 0 ) ^ ( end == 0 )) {
178 throw std::invalid_argument(
179 "Cannot add region to GenomeRegionList with either start == 0 or end == 0, "
180 "but not both, as we use 1-base indexing, with both being 0 being interpreted "
181 "as the special case of denoting the whole chromosome. "
182 "Hence either both start and end have to be 0, or neither."
190 regions_[ chromosome ].insert_overlap({ start, end });
192 regions_[ chromosome ].insert({ start, end });
222 throw std::invalid_argument(
223 "Cannot use two GenomeLocus instances with different chromosomes ( start == \"" +
260 for(
auto const& chr : other.regions_ ) {
261 for(
auto const& interval : chr.second ) {
262 add( chr.first, interval.low(), interval.high(), overlap );
282 void clear( std::string
const& chromosome )
284 if( regions_.count( chromosome ) == 0 ) {
285 throw std::invalid_argument(
286 "Chromosome name \"" + chromosome +
"\" not found in GenomeRegionList"
289 regions_.erase( chromosome );
303 auto const it = regions_.find( chromosome );
304 if( it == regions_.end() ) {
307 auto const& chrom_tree = it->second;
311 if( chrom_tree.overlap_find( 0 ) != chrom_tree.end() ) {
316 return chrom_tree.overlap_find( position ) != chrom_tree.end();
326 auto const it = regions_.find( chromosome );
327 if( it == regions_.end() ) {
330 auto const& chrom_tree = it->second;
331 return chrom_tree.overlap_find( 0 ) != chrom_tree.end();
348 std::string
const& chromosome,
350 bool whole_chromosome =
false
353 auto const it = regions_.find( chromosome );
354 if( it == regions_.end() ) {
357 auto const& chrom_tree = it->second;
363 assert( it->interval().within( position ));
369 if( whole_chromosome && ( chrom_tree.overlap_find( 0 ) != chrom_tree.end() )) {
406 return regions_.empty();
414 return regions_.size();
422 std::vector<std::string> result;
423 for(
auto const& p : regions_ ) {
424 result.push_back( p.first );
434 return regions_.count( chromosome ) > 0;
443 return regions_.at( chromosome );
457 return regions_.at( chromosome );
465 if( regions_.count( chromosome ) == 0 ) {
466 throw std::invalid_argument(
467 "Chromosome name \"" + chromosome +
"\" not found in GenomeRegionList"
470 return regions_.at( chromosome ).size();
479 for(
auto const& reg : regions_ ) {
480 cnt += reg.second.size();
556 std::map<std::string, tree_type> regions_;
565 #endif // include guard
typename tree_type::const_iterator const_iterator
std::vector< std::string > chromosome_names() const
Get a list of all stored chromosome names.
std::map< std::string, tree_type > const & chromosome_map() const
Access the underlying container directly.
void clear(std::string const &chromosome)
Remove the regions of the specified chromosome.
size_t region_count(std::string const &chromosome) const
Return the number of regions stored for the specified chromosome.
void add(std::string const &chromosome)
Add a whole chromosome to the list, so that all its positions are considered to be covered.
bool empty() const
Return whether there are chromosomes with regions stored.
void add(GenomeRegionList const &other, bool overlap=false)
Add a complete GenomeRegionList to this list.
size_t chromosome_count() const
Return the number of chromosomes for which there are regions stored.
A single locus, that is, a position (or coordinate) on a chromosome.
bool has_chromosome(std::string const &chromosome) const
Return whether a chromosome is stored.
List of regions in a genome, for each chromosome.
std::string to_string(GenomeLocus const &locus)
EmptyGenomeData data_type
size_t cover_count(std::string const &chromosome, numerical_type position, bool whole_chromosome=false) const
Retun the number of regions (intervals) that overlap with a given position on a chromosome.
bool is_covered(std::string const &chromosome) const
Return whether a whole chromosome is covered.
std::map< std::string, tree_type > & chromosome_map()
Access the underlying container directly.
Helper type to define a closed [] Interval.
void add(std::string const &chromosome, numerical_type start, numerical_type end, bool overlap=false)
Add a region to the list, given its chromosome, and start and end positions.
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
bool is_covered(std::string const &chromosome, numerical_type position) const
Return whether a given position on a chromosome is part of any of the regions (intervals) that are st...
GenomeRegionList & operator=(GenomeRegionList const &)=default
void add(GenomeLocus const &start, GenomeLocus const &end, bool overlap=false)
Add an interval between two GenomeLoci on the same chromosome.
tree_type const & chromosome_regions(std::string const &chromosome) const
For a given chromosome, return the IntervalTree that stores its regions.
IntervalTreeIterator< node_type, false > iterator
void add(GenomeLocus const &locus, bool overlap=false)
Add a single GenomeLocus, that is, an interval covering one position on a chromosome.
Helper struct to define a default empty data for the classes GenomeLocus, GenomeRegion,...
GenomeRegionList()=default
IntervalTreeIterator< node_type, true > const_iterator
void add(GenomeRegion const ®ion, bool overlap=false)
Add a GenomeRegion to the list.
tree_type & chromosome_regions(std::string const &chromosome)
For a given chromosome, return the IntervalTree that stores its regions.
A region (between two positions) on a chromosome.
void clear()
Remove all stored regions from all chromosomes.
~GenomeRegionList()=default
Interval tree that enables storing and querying intervals, each containing some data.
size_t total_region_count() const
Return the number of regions stored in total, across all chromosomes.
typename tree_type::iterator iterator