37 #include <unordered_set>
40 namespace population {
47 std::string
const& chromosome,
52 if( chromosome.empty() ) {
53 throw std::invalid_argument(
54 "Cannot add region to GenomeLocusSet with empty chromosome name, "
55 "as this denotes an invalid chromosome."
63 throw std::invalid_argument(
64 "Cannot add region to GenomeLocusSet with start == " +
68 if(( start == 0 ) ^ (
end == 0 )) {
69 throw std::invalid_argument(
70 "Cannot add region to GenomeLocusSet with either start == 0 or end == 0, "
71 "but not both, as we use 1-base indexing, with both being 0 being interpreted "
72 "as the special case of denoting the whole chromosome. "
73 "Hence either both start and end have to be 0, or neither."
76 assert( start <=
end );
80 auto& bv = locus_map_[ chromosome ];
83 }
else if( bv.size() <
end + 1 ) {
84 if( bv.size() * 2 <
end + 1 ) {
90 assert( bv.size() >=
end + 1 );
93 bv.set( start,
end + 1 );
107 if( chromosome.empty() ) {
108 throw std::invalid_argument(
109 "Cannot add region to GenomeLocusSet with empty chromosome name, "
110 "as this denotes an invalid chromosome."
113 if( locus_map_.count( chromosome ) > 0 ) {
114 throw std::invalid_argument(
115 "Cannot add region via chromosome and Bitvector, as chromosome \"" + chromosome +
116 "\" is already present in the GenomeLocusSet."
119 if( values.empty() ) {
120 throw std::invalid_argument(
121 "Cannot add region via chromosome and Bitvector, as given Bitvector is empty."
124 assert( values.size() > 0 );
125 if( values.size() == 1 && !values.get(0) ) {
126 throw std::invalid_argument(
127 "Cannot add region via chromosome and Bitvector, as given Bitvector has [0]==false."
132 locus_map_[ chromosome ] = values;
149 std::unordered_set<std::string> chrs_to_delete;
150 for(
auto const& chr : lhs.locus_map_ ) {
151 assert( chrs_to_delete.count( chr.first ) == 0 );
152 chrs_to_delete.insert( chr.first );
156 for(
auto const& chr : rhs.locus_map_ ) {
159 if( ! chrs_to_delete.count( chr.first ) ) {
160 assert( ! lhs.has_chromosome( chr.first ));
163 assert( lhs.has_chromosome( chr.first ));
168 assert( lhs.locus_map_.at( chr.first ).size() > 0 );
169 assert( rhs.locus_map_.at( chr.first ).size() > 0 );
172 auto& lhs_bits = lhs.locus_map_.at( chr.first );
173 auto const& rhs_bits = rhs.locus_map_.at( chr.first );
174 auto const lhs_bit_0 = lhs_bits.get( 0 );
175 auto const rhs_bit_0 = rhs_bits.get( 0 );
176 assert( &rhs_bits == &chr.second );
179 if( lhs_bit_0 && rhs_bit_0 ) {
183 }
else if( lhs_bit_0 && !rhs_bit_0 ) {
186 assert( lhs_bits.size() > 0 );
187 assert( !lhs_bits.get(0) );
188 }
else if( !lhs_bit_0 && rhs_bit_0 ) {
191 assert( lhs_bits.size() > 0 );
192 assert( !lhs_bits.get(0) );
194 assert( !lhs_bit_0 && !rhs_bit_0 );
198 lhs_bits =
bitwise_and( lhs_bits, rhs_bits,
false );
199 assert( lhs_bits.size() > 0 );
200 assert( !lhs_bits.get(0) );
207 if( lhs_bits.count() > 0 ) {
208 chrs_to_delete.erase( chr.first );
213 for(
auto const& chr : chrs_to_delete ) {
214 assert( lhs.has_chromosome( chr ));
215 lhs.locus_map_.erase( chr );
229 for(
auto const& chr : rhs.locus_map_ ) {
231 auto const& rhs_bits = rhs.locus_map_.at( chr.first );
232 assert( &rhs_bits == &chr.second );
234 if( lhs.has_chromosome( chr.first )) {
235 assert( lhs.locus_map_.count( chr.first ) > 0 );
236 auto& lhs_bits = lhs.locus_map_.at( chr.first );
237 if( lhs_bits.get(0) || rhs_bits.get(0) ) {
245 lhs_bits =
bitwise_or( lhs_bits, rhs_bits,
true );
251 assert( lhs.locus_map_.count( chr.first ) == 0 );
252 auto& lhs_bits = lhs.locus_map_[ chr.first ];
253 if( rhs_bits.get(0) ) {
266 for(
auto& chr_bv : locus_map_ ) {
268 if( ! sequence_dict.
contains( chr_bv.first )) {
269 throw std::runtime_error(
270 "Cannot invert Genome Locus Set for chromosome \"" + chr_bv.first +
271 "\", as the given Sequence Dict does not contain an entry for that chromosome"
276 auto const& seq_entry = sequence_dict.
get( chr_bv.first );
277 assert( chr_bv.second.size() > 0 );
278 if( chr_bv.second.size() - 1 > seq_entry.size() ) {
279 throw std::runtime_error(
280 "Cannot invert Genome Locus Set for chromosome \"" + chr_bv.first +
281 "\", as the given Sequence Dict indicates its length to be " +
292 if( ! chr_bv.second.get(0) ) {
293 new_bv =
Bitvector( seq_entry.size() + 1, chr_bv.second );
295 new_bv.set( 0,
false );
296 assert( new_bv.size() >= chr_bv.second.size() );
300 locus_map_[ chr_bv.first ] = new_bv;