|
A library for working with phylogenetic and population genetic data.
v0.32.0
|
|
Go to the documentation of this file. 1 #ifndef GENESIS_POPULATION_GENOME_LOCUS_SET_H_
2 #define GENESIS_POPULATION_GENOME_LOCUS_SET_H_
39 #include <unordered_map>
49 namespace population {
85 using value_type = std::pair<std::string const, utils::Bitvector>;
90 using iterator =
typename std::unordered_map<std::string, utils::Bitvector>::iterator;
91 using const_iterator =
typename std::unordered_map<std::string, utils::Bitvector>::const_iterator;
96 static const size_t npos = std::numeric_limits<size_t>::max();
99 "Differing definitions of GenomeLocusSet::npos and Bitvector::npos"
127 void add( std::string
const& chromosome )
130 add( chromosome, 0, 0 );
136 void add( std::string
const& chromosome,
size_t position )
138 add( chromosome, position, position );
151 std::string
const& chromosome,
174 throw std::invalid_argument(
175 "Cannot use two GenomeLocus instances with different chromosomes ( start == \"" +
176 start.
chromosome +
"\", end == \"" +
end.chromosome +
"\") as an entry in a "
207 for(
auto const& interval : chr.second ) {
208 add( chr.first, interval.low(), interval.high() );
245 void clear( std::string
const& chromosome )
247 if( locus_map_.count( chromosome ) == 0 ) {
248 throw std::invalid_argument(
249 "Chromosome name \"" + chromosome +
"\" not found in GenomeLocusSet"
252 locus_map_.erase( chromosome );
299 if( bitvector.
empty() ) {
300 throw std::invalid_argument(
301 "GenomeLocusSet::is_covered( Bitvector const&, size_t ) called with empty Bitvector"
311 if( bitvector.
get( 0 ) ) {
317 if( position >= bitvector.
size() ) {
320 return bitvector.
get( position );
342 bool is_covered( std::string
const& chromosome,
size_t position )
const
345 auto const it = locus_map_.find( chromosome );
346 if( it == locus_map_.end() ) {
349 auto const& bv = it->second;
350 assert( ! bv.empty() );
362 auto const it = locus_map_.find( chromosome );
363 if( it == locus_map_.end() ) {
366 auto const& bv = it->second;
367 assert( ! bv.empty() );
380 auto const it = locus_map_.find( chromosome );
381 if( it == locus_map_.end() ) {
384 auto const& bv = it->second;
385 assert( ! bv.empty() );
406 if( bitvector.
empty() ) {
407 throw std::invalid_argument(
408 "GenomeLocusSet::next_covered() called with empty Bitvector"
411 if( start_position == 0 ) {
412 throw std::invalid_argument(
413 "GenomeLocusSet::next_covered() called with start_position==0"
419 if( bitvector.
get( 0 ) ) {
420 return start_position;
449 size_t next_covered( std::string
const& chromosome,
size_t start_position )
const
451 auto const it = locus_map_.find( chromosome );
452 if( it == locus_map_.end() ) {
455 auto const& bv = it->second;
456 assert( ! bv.empty() );
469 return locus_map_.begin();
477 return locus_map_.end();
489 return locus_map_.empty();
497 return locus_map_.size();
505 std::vector<std::string> result;
506 for(
auto const& p : locus_map_ ) {
507 result.push_back( p.first );
517 return locus_map_.count( chromosome ) > 0;
527 return locus_map_.find( chromosome );
536 return locus_map_.at( chromosome );
549 return locus_map_.at( chromosome );
561 std::unordered_map<std::string, utils::Bitvector> locus_map_;
568 #endif // include guard
void add(std::string const &chromosome, size_t position)
Add a single locus (position, coordinate) to the list.
void add(GenomeRegion const ®ion)
Add a GenomeRegion to the list.
std::map< std::string, tree_type > const & chromosome_map() const
Access the underlying container directly.
utils::Bitvector const & chromosome_positions(std::string const &chromosome) const
For a given chromosome, return the Bitvector that stores its positions.
static bool is_covered(utils::Bitvector const &bitvector, size_t position)
Return whether a given position on the provided bitvector is covered.
void set_union(GenomeLocusSet const &rhs)
Compute the union with another GenomeLocusSet rhs.
void clear()
Remove all stored regions from all chromosomes.
std::vector< std::string > chromosome_names() const
Get a list of all stored chromosome names.
bool get(size_t index) const
Return the value of a single bit, with boundary check.
bool is_covered(std::string const &chromosome) const
Return whether a whole chromosome is covered.
const_iterator end() const
Return an iterator to the end of the map of chromosome names to Bitvectors.
Store dictionary/index data on sequence files, such as coming from .fai or .dict files.
std::pair< std::string const, utils::Bitvector > value_type
bool empty() const
Return whether there are chromosomes with positions stored.
size_t find_next_set(size_t start) const
Return the index of the next position in the Bitvector that is set.
bool has_chromosome(std::string const &chromosome) const
Return whether a chromosome is stored.
List of positions/coordinates in a genome, for each chromosome.
size_t chromosome_count() const
Return the number of chromosomes for which there are positions stored.
A single locus, that is, a position (or coordinate) on a chromosome.
typename std::unordered_map< std::string, utils::Bitvector >::iterator iterator
static size_t next_covered(const_iterator const &it, size_t start_position)
Return the next position (including the start_position) that is covered.
void add(GenomeRegionList const &list)
Add a complete GenomeRegionList to this list.
const_iterator begin() const
Return an iterator to the beginning of the map of chromosome names to Bitvectors.
List of regions in a genome, for each chromosome.
value_type const & const_reference
void clear(std::string const &chromosome)
Remove the regions of the specified chromosome.
static const size_t npos
Position value to indicate that next_covered did not find any covered position.
void add(GenomeLocus const &start, GenomeLocus const &end)
Add an interval between two GenomeLoci on the same chromosome.
void add(GenomeLocus const &locus)
Add a single GenomeLocus, that is, an interval covering one position on a chromosome.
void invert(sequence::SequenceDict const &sequence_dict)
Invert all chromosome regions.
bool is_covered(std::string const &chromosome, size_t position) const
Return whether a given position on a chromosome is part of any of the regions stored.
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
static size_t next_covered(utils::Bitvector const &bitvector, size_t start_position)
Return the next position (including the start_position) that is covered.
bool empty() const
Return whether the Bitvector is empty, that is, has size() == 0.
size_t next_covered(std::string const &chromosome, size_t start_position) const
Return the next position (including the start_position) that is covered.
void set_intersect(GenomeLocusSet const &rhs)
Compute the intersection with another GenomeLocusSet rhs.
A region (between two positions) on a chromosome.
const_iterator find(std::string const &chromosome)
Find a chromosome in the map.
utils::Bitvector & chromosome_positions(std::string const &chromosome)
For a given chromosome, return the Bitvector that stores its positions.
GenomeLocusSet & operator=(GenomeLocusSet const &)=default
void add(std::string const &chromosome)
Add a whole chromosome to the list, so that all its positions are considered to be covered.
size_t size() const
Return the size (number of bits) of this Bitvector.
static const size_t npos
Value to indicate that find_next_set() did not find any set bits.
~GenomeLocusSet()=default
bool any_covered(std::string const &chromosome) const
Return if the given chromosome has any loci covered.
static bool is_covered(const_iterator const &it, size_t position)
Return whether a given position on the provided iterator is covered.
typename std::unordered_map< std::string, utils::Bitvector >::const_iterator const_iterator