A library for working with phylogenetic and population genetic data.
v0.32.0
microvariant_scanner.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_KMER_MICROVARIANT_SCANNER_H_
2 #define GENESIS_SEQUENCE_KMER_MICROVARIANT_SCANNER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2024 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@sund.ku.dk>
23  University of Copenhagen, Globe Institute, Section for GeoGenetics
24  Oster Voldgade 5-7, 1350 Copenhagen K, Denmark
25 */
26 
36 
37 #include <array>
38 #include <cassert>
39 #include <climits>
40 #include <cstdint>
41 #include <limits>
42 #include <stdexcept>
43 #include <string>
44 
45 namespace genesis {
46 namespace sequence {
47 
48 // =================================================================================================
49 // Kmer Microvariant Scanner
50 // =================================================================================================
51 
55 template<typename Tag>
57 {
58 public:
59 
60  // -----------------------------------------------------
61  // Typedefs
62  // -----------------------------------------------------
63 
64  using iterator_category = std::forward_iterator_tag;
67 
68  // -----------------------------------------------------
69  // Constructors and Rule of Five
70  // -----------------------------------------------------
71 
72  MicrovariantScanner() // = default;
73  : kmer_( 0 )
74  {}
75 
76  explicit MicrovariantScanner( Kmer<Tag> const& kmer )
77  : kmer_( kmer )
78  , pos_( 0 )
79  , cnt_( 0 )
80  {
81  // Iterate to the first microvariant.
82  operator++();
83  }
84 
85  ~MicrovariantScanner() = default;
86 
87  MicrovariantScanner( MicrovariantScanner const& ) = default;
89 
92 
93  // -----------------------------------------------------
94  // Operators
95  // -----------------------------------------------------
96 
98  {
99  return kmer_;
100  }
101 
103  {
104  return &kmer_;
105  }
106 
108  {
109  // Check assumptions of this function.
110  using WordType = typename Kmer<Tag>::WordType;
111  static_assert( Kmer<Tag>::BITS_PER_CHAR == 2, "Kmer<Tag>::BITS_PER_CHAR != 2" );
112  static_assert( std::is_same<WordType, std::uint64_t>::value, "Kmer::WordType != uint64_t" );
113 
114  // We use four xor's at the current position to cycle through the variants:
115  // The first thee are the substitutions, the last one then restores the original value.
116  // For this, we use the xor order 01 11 01 11.
117  //
118  // The table shows that this works for all four possible values.
119  //
120  // | 00 01 10 11
121  // ---------------------
122  // 0 | 01 | 01 00 11 10
123  // 1 | 11 | 10 11 00 01
124  // 2 | 01 | 11 10 01 00
125  // 3 | 11 | 00 01 10 11
126 
127  // Helper function that cycles the value at a position, using the above table.
128  auto cycle_ = [&]( size_t pos, size_t& cnt )
129  {
130  // Move the needed xor value to the position in the word and apply it.
131  WordType const xor_val = ( cnt % 2 == 0 ? 0x1 : 0x3 );
132  kmer_.value() ^= ( xor_val << ( 2 * pos ));
133  ++cnt;
134  };
135 
136  // Do at least one cycle at the current position.
137  cycle_( pos_, cnt_ );
138 
139  // Check if we already did all three possible substitutions at the current position.
140  if( cnt_ == 4 ) {
141 
142  // If this is not the last possible position, move to the next one.
143  // Otherwise, we are done, indicated by setting everything to the default.
144  assert( kmer_.k() > 0 );
145  if( pos_ < kmer_.k() - 1 ) {
146  ++pos_;
147  cnt_ = 0;
148  cycle_( pos_, cnt_ );
149  } else {
150  pos_ = std::numeric_limits<size_t>::max();
151  cnt_ = std::numeric_limits<size_t>::max();
152  }
153  }
154 
155  return *this;
156  }
157 
158  // self_type operator ++ (int)
159  // {
160  // self_type tmp = *this;
161  // ++(*this);
162  // return tmp;
163  // }
164 
165  bool operator == ( self_type const& other ) const
166  {
167  return ( pos_ == other.pos_ ) && ( cnt_ == other.cnt_ );
168  }
169 
170  bool operator != ( self_type const& other ) const
171  {
172  return !( other == *this );
173  }
174 
175  // -----------------------------------------------------
176  // Members
177  // -----------------------------------------------------
178 
182  size_t position() const
183  {
184  return pos_;
185  }
186 
190  Kmer<Tag> const& kmer() const
191  {
192  return kmer_;
193  }
194 
195 private:
196 
197  // The current k-mer, which always has an additional value (compared to the original k-mer).
198  Kmer<Tag> kmer_;
199 
200  // The position where currently a value is inserted, and the counter for the possible
201  // microvariant (substitution) possibilities per position.
202  size_t pos_ = std::numeric_limits<size_t>::max();
203  size_t cnt_ = std::numeric_limits<size_t>::max();
204 
205 };
206 
207 // =================================================================================================
208 // Range Wrapper
209 // =================================================================================================
210 
211 template<typename Tag>
213 {
214  return {
215  MicrovariantScanner<Tag>( kmer ),
217  };
218 }
219 
220 } // namespace sequence
221 } // namespace genesis
222 
223 #endif // include guard
genesis::sequence::MicrovariantScanner::operator=
MicrovariantScanner & operator=(MicrovariantScanner const &)=default
genesis::sequence::MicrovariantScanner::MicrovariantScanner
MicrovariantScanner()
Definition: microvariant_scanner.hpp:72
genesis::sequence::MicrovariantScanner::operator++
self_type & operator++()
Definition: microvariant_scanner.hpp:107
kmer.hpp
genesis::sequence::MicrovariantScanner::iterator_category
std::forward_iterator_tag iterator_category
Definition: microvariant_scanner.hpp:64
genesis::sequence::MicrovariantScanner::kmer
Kmer< Tag > const & kmer() const
Get the current k-mer microvariant.
Definition: microvariant_scanner.hpp:190
genesis::sequence::MicrovariantScanner::operator!=
bool operator!=(self_type const &other) const
Definition: microvariant_scanner.hpp:170
genesis::sequence::MicrovariantScanner::position
size_t position() const
Get the position that is currently deleted.
Definition: microvariant_scanner.hpp:182
genesis::sequence::Kmer
Kmer class template for representing k-mers of various sizes, currently up to k-32.
Definition: kmer.hpp:69
genesis::sequence::MicrovariantScanner::operator==
bool operator==(self_type const &other) const
Definition: microvariant_scanner.hpp:165
genesis::sequence::MicrovariantScanner::~MicrovariantScanner
~MicrovariantScanner()=default
genesis::sequence::MicrovariantScanner
Definition: microvariant_scanner.hpp:56
genesis::sequence::MicrovariantScanner::operator*
value_type const & operator*()
Definition: microvariant_scanner.hpp:97
genesis::sequence::iterate_microvariants
utils::Range< MicrovariantScanner< Tag > > iterate_microvariants(Kmer< Tag > const &kmer)
Definition: microvariant_scanner.hpp:212
range.hpp
genesis::sequence::MicrovariantScanner::operator->
value_type const * operator->()
Definition: microvariant_scanner.hpp:102
genesis::sequence::MicrovariantScanner::MicrovariantScanner
MicrovariantScanner(Kmer< Tag > const &kmer)
Definition: microvariant_scanner.hpp:76
genesis::utils::Range
Simple wrapper for typical begin() and end() iterators, to be used in range-based for loops.
Definition: range.hpp:46
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::sequence::Kmer::WordType
uint64_t WordType
Underlying integer type used to store the k-mer.
Definition: kmer.hpp:80