A library for working with phylogenetic and population genetic data.
v0.32.0
kmer_scanner.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_KMER_KMER_SCANNER_H_
2 #define GENESIS_SEQUENCE_KMER_KMER_SCANNER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2024 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@sund.ku.dk>
23  University of Copenhagen, Globe Institute, Section for GeoGenetics
24  Oster Voldgade 5-7, 1350 Copenhagen K, Denmark
25 */
26 
35 
36 #include <array>
37 #include <cassert>
38 #include <climits>
39 #include <cstdint>
40 #include <limits>
41 #include <stdexcept>
42 #include <string>
43 
44 namespace genesis {
45 namespace sequence {
46 
47 // =================================================================================================
48 // Kmer Scanner
49 // =================================================================================================
50 
54 template<typename Tag>
56 {
57 public:
58 
59  // -------------------------------------------------------------------------
60  // Member Types
61  // -------------------------------------------------------------------------
62 
65  using pointer = value_type*;
67  using iterator_category = std::input_iterator_tag;
68 
69  // ======================================================================================
70  // Internal Iterator
71  // ======================================================================================
72 
73 public:
74 
75  class Iterator
76  {
77  // -------------------------------------------------------------------------
78  // Typedefs and Enums
79  // -------------------------------------------------------------------------
80 
81  public:
82 
85  using pointer = value_type const*;
86  using reference = value_type const&;
87  using iterator_category = std::input_iterator_tag;
88 
89  // -------------------------------------------------------------------------
90  // Constructors and Rule of Five
91  // -------------------------------------------------------------------------
92 
93  private:
94 
95  Iterator() = default;
96 
97  Iterator( KmerScanner const* parent )
98  : parent_( parent )
99  {
100  // Safeguard
101  if( ! parent_ ) {
102  return;
103  }
104 
105  // Start streaming the data
106  increment_();
107  }
108 
109  public:
110 
111  ~Iterator() = default;
112 
113  Iterator( self_type const& ) = default;
114  Iterator( self_type&& ) = default;
115 
116  Iterator& operator= ( self_type const& ) = default;
117  Iterator& operator= ( self_type&& ) = default;
118 
119  friend KmerScanner;
120 
121  // -------------------------------------------------------------------------
122  // Iterator Accessors
123  // -------------------------------------------------------------------------
124 
125  value_type const* operator->() const
126  {
127  return &kmer_;
128  }
129 
131  {
132  return &kmer_;
133  }
134 
135  value_type const& operator*() const
136  {
137  return kmer_;
138  }
139 
141  {
142  return kmer_;
143  }
144 
145  // -------------------------------------------------------------------------
146  // Iteration
147  // -------------------------------------------------------------------------
148 
150  {
151  increment_();
152  return *this;
153  }
154 
164  bool operator==( self_type const& it ) const
165  {
166  return parent_ == it.parent_;
167  }
168 
169  bool operator!=( self_type const& it ) const
170  {
171  return !(*this == it);
172  }
173 
174  // -------------------------------------------------------------------------
175  // Internal Members
176  // -------------------------------------------------------------------------
177 
178  private:
179 
180  // ---------------------------------------------
181  // Increment and Processing Samples
182  // ---------------------------------------------
183 
184  void increment_()
185  {
186  // inc or restat
187  }
188 
189  void init_kmer_from_position_()
190  {
191  assert( parent_ );
192 
193  while( true ) {
194  if( position_ + Kmer<Tag>::k() > parent_->input.size() ) {
195  parent_ = nullptr;
196  return;
197  }
198  if( position_ + len == Kmer<Tag>::k() ) {
199  break;
200  }
201 
202  encoding.char_to_rank()
203  }
204  }
205 
206  // -------------------------------------------------------------------------
207  // Data Members
208  // -------------------------------------------------------------------------
209 
210  private:
211 
212  // Parent. If null, this indicates the end of the input and that we are done iterating.
213  KmerScanner const* parent_ = nullptr;
214 
215  Kmer<Tag>::WordType kmer_;
216  size_t position_ = 0;
217  };
218 
219  // ======================================================================================
220  // Main Class
221  // ======================================================================================
222 
223  // -------------------------------------------------------------------------
224  // Constructors and Rule of Five
225  // -------------------------------------------------------------------------
226 
227  KmerScanner( std::string const& input )
228  : input_( input )
229  {}
230 
231  KmerScanner( std::string&& input )
232  : input_( std::move( input ))
233  {}
234 
235  KmerScanner( std::string const& input, uint8_t k )
236  : input_( input )
237  {
238  Kmer<Tag>::set_k( k );
239  }
240 
241  KmerScanner( std::string&& input, uint8_t k )
242  : input_( std::move( input ))
243  {
244  Kmer<Tag>::set_k( k );
245  }
246 
247  ~KmerScanner() = default;
248 
249  KmerScanner( self_type const& ) = default;
250  KmerScanner( self_type&& ) = default;
251 
252  self_type& operator= ( self_type const& ) = default;
253  self_type& operator= ( self_type&& ) = default;
254 
255  // -------------------------------------------------------------------------
256  // Iteration
257  // -------------------------------------------------------------------------
258 
259  Iterator begin() const
260  {
261  return Iterator( this );
262  }
263 
264  Iterator end() const
265  {
266  return Iterator();
267  }
268 
269  // -------------------------------------------------------------------------
270  // Data Members
271  // -------------------------------------------------------------------------
272 
273 private:
274 
275  std::string input_;
276 
277 };
278 
279 } // namespace sequence
280 } // namespace genesis
281 
282 #endif // include guard
genesis::sequence::KmerScanner::Iterator
Definition: kmer_scanner.hpp:75
kmer.hpp
genesis::sequence::KmerScanner::~KmerScanner
~KmerScanner()=default
genesis::sequence::KmerScanner::Iterator::KmerScanner
friend KmerScanner
Definition: kmer_scanner.hpp:119
genesis::sequence::KmerScanner::Iterator::operator=
Iterator & operator=(self_type const &)=default
genesis::sequence::Kmer
Kmer class template for representing k-mers of various sizes, currently up to k-32.
Definition: kmer.hpp:69
genesis::sequence::KmerScanner::KmerScanner
KmerScanner(std::string const &input)
Definition: kmer_scanner.hpp:227
genesis::sequence::KmerScanner::end
Iterator end() const
Definition: kmer_scanner.hpp:264
genesis::sequence::KmerScanner::operator=
self_type & operator=(self_type const &)=default
genesis::sequence::KmerScanner::begin
Iterator begin() const
Definition: kmer_scanner.hpp:259
genesis::sequence::KmerScanner::KmerScanner
KmerScanner(std::string const &input, uint8_t k)
Definition: kmer_scanner.hpp:235
genesis::sequence::KmerScanner::Iterator::operator*
value_type const & operator*() const
Definition: kmer_scanner.hpp:135
genesis::sequence::KmerScanner::Iterator::operator++
self_type & operator++()
Definition: kmer_scanner.hpp:149
genesis::sequence::Kmer::k
static uint8_t k()
Definition: kmer.hpp:117
genesis::sequence::KmerScanner::Iterator::operator!=
bool operator!=(self_type const &it) const
Definition: kmer_scanner.hpp:169
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::sequence::KmerScanner::self_type
KmerScanner self_type
Definition: kmer_scanner.hpp:63
genesis::sequence::KmerScanner::Iterator::operator==
bool operator==(self_type const &it) const
Compare two iterators for equality.
Definition: kmer_scanner.hpp:164
genesis::sequence::KmerScanner::KmerScanner
KmerScanner(std::string &&input)
Definition: kmer_scanner.hpp:231
genesis::sequence::KmerScanner::KmerScanner
KmerScanner(std::string &&input, uint8_t k)
Definition: kmer_scanner.hpp:241
genesis::sequence::KmerScanner::iterator_category
std::input_iterator_tag iterator_category
Definition: kmer_scanner.hpp:67
genesis::sequence::KmerScanner::Iterator::iterator_category
std::input_iterator_tag iterator_category
Definition: kmer_scanner.hpp:87
genesis::sequence::Kmer::WordType
uint64_t WordType
Underlying integer type used to store the k-mer.
Definition: kmer.hpp:80
genesis::sequence::KmerScanner::Iterator::operator*
value_type & operator*()
Definition: kmer_scanner.hpp:140
genesis::sequence::Kmer::set_k
static void set_k(uint8_t k)
Set the value of k for all Kmers of the given Tag.
Definition: kmer.hpp:129
genesis::sequence::KmerScanner::Iterator::reference
value_type const & reference
Definition: kmer_scanner.hpp:86
genesis::sequence::KmerScanner::Iterator::pointer
value_type const * pointer
Definition: kmer_scanner.hpp:85
genesis::sequence::KmerScanner::Iterator::~Iterator
~Iterator()=default
genesis::sequence::KmerScanner::Iterator::operator->
value_type * operator->()
Definition: kmer_scanner.hpp:130
genesis::sequence::KmerScanner::Iterator::operator->
value_type const * operator->() const
Definition: kmer_scanner.hpp:125
genesis::sequence::KmerScanner
Definition: kmer_scanner.hpp:55