A library for working with phylogenetic and population genetic data.
v0.27.0
signature_specifications.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FUNCTIONS_SIGNATURE_SPECIFICATIONS_H_
2 #define GENESIS_SEQUENCE_FUNCTIONS_SIGNATURE_SPECIFICATIONS_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2017 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
35 
36 #include <string>
37 #include <vector>
38 
39 namespace genesis {
40 namespace sequence {
41 
42 // =================================================================================================
43 // Forwardd Declarations
44 // =================================================================================================
45 
46 class Sequence;
47 class SequenceSet;
48 
49 // =================================================================================================
50 // Signature Specifications
51 // =================================================================================================
52 
65 {
66 public:
67 
68  // -------------------------------------------------------------------------
69  // Typedefs and Enums
70  // -------------------------------------------------------------------------
71 
77  {
81  kSkip,
82 
86  kThrow
87  };
88 
92  static size_t const InvalidCharIndex;
93 
94  // -------------------------------------------------------------------------
95  // Constructors and Rule of Five
96  // -------------------------------------------------------------------------
97 
98  SignatureSpecifications() = default;
99  SignatureSpecifications( std::string const& alphabet, size_t k );
100 
104  ~SignatureSpecifications() = default;
105 
110 
115 
120 
125 
126  // -------------------------------------------------------------------------
127  // Accessors
128  // -------------------------------------------------------------------------
129 
130  std::string const& alphabet() const
131  {
132  return alphabet_;
133  }
134 
135  size_t k() const
136  {
137  return k_;
138  }
139 
141  {
142  return unknown_char_behavior_;
143  }
144 
145  // -------------------------------------------------------------------------
146  // Derived Properties
147  // -------------------------------------------------------------------------
148 
152  bool is_nucleic_acids() const
153  {
154  return is_nucleic_acids_;
155  }
156 
163  size_t char_index( char c ) const
164  {
165  return index_lookup_[c];
166  }
167 
171  std::vector<std::string> const& kmer_list() const;
172 
173  size_t kmer_list_size() const;
174 
179  std::vector<size_t> const& kmer_combined_reverse_complement_map() const;
180 
184  std::vector<size_t> const& kmer_reverse_complement_indices() const;
185 
186  std::vector<std::string> const& kmer_reverse_complement_list() const;
187 
188  size_t kmer_reverse_complement_list_size( bool with_palindromes = true ) const;
189 
190  // -------------------------------------------------------------------------
191  // Modifiers
192  // -------------------------------------------------------------------------
193 
195  {
196  unknown_char_behavior_ = value;
197  return *this;
198  }
199 
200  // -------------------------------------------------------------------------
201  // Data Members
202  // -------------------------------------------------------------------------
203 
204 private:
205 
206  // Direct Settings
207  std::string alphabet_;
208  size_t k_ = 0;
209  UnknownCharBehavior unknown_char_behavior_ = UnknownCharBehavior::kSkip;
210 
211  // Induced Settings and Helpers
212  bool is_nucleic_acids_ = false;
213  utils::CharLookup<size_t> index_lookup_;
214 
215  // Cached lookup lists
216  mutable std::vector<std::string> kmer_list_;
217  mutable std::vector<std::string> rev_comp_list_;
218  mutable std::vector<size_t> rev_comp_map_;
219  mutable std::vector<size_t> rev_comp_indices_;
220 };
221 
222 } // namespace sequence
223 } // namespace genesis
224 
225 #endif // include guard
genesis::sequence::SignatureSpecifications::kmer_list_size
size_t kmer_list_size() const
Definition: signature_specifications.cpp:117
genesis::sequence::SignatureSpecifications::kmer_reverse_complement_list
std::vector< std::string > const & kmer_reverse_complement_list() const
Definition: signature_specifications.cpp:226
genesis::sequence::SignatureSpecifications::operator=
SignatureSpecifications & operator=(SignatureSpecifications const &)=default
Default copy assignment.
genesis::sequence::SignatureSpecifications::InvalidCharIndex
static const size_t InvalidCharIndex
Value that is used to indicate an invalid (non-alphabet) char when using index_of().
Definition: signature_specifications.hpp:92
genesis::sequence::SignatureSpecifications::~SignatureSpecifications
~SignatureSpecifications()=default
Default destructor.
genesis::sequence::SignatureSpecifications::unknown_char_behavior
SignatureSpecifications & unknown_char_behavior(UnknownCharBehavior value)
Definition: signature_specifications.hpp:194
genesis::sequence::SignatureSpecifications::unknown_char_behavior
UnknownCharBehavior unknown_char_behavior() const
Definition: signature_specifications.hpp:140
genesis::sequence::SignatureSpecifications::kmer_reverse_complement_list_size
size_t kmer_reverse_complement_list_size(bool with_palindromes=true) const
Definition: signature_specifications.cpp:263
genesis::sequence::SignatureSpecifications::kmer_list
std::vector< std::string > const & kmer_list() const
Return the list of all possible k-mers for the given k and alphabet.
Definition: signature_specifications.cpp:80
char_lookup.hpp
genesis::sequence::SignatureSpecifications::k
size_t k() const
Definition: signature_specifications.hpp:135
genesis::utils::CharLookup< size_t >
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::sequence::SignatureSpecifications::UnknownCharBehavior::kThrow
@ kThrow
Throw an exception.
genesis::sequence::SignatureSpecifications::SignatureSpecifications
SignatureSpecifications()=default
genesis::sequence::SignatureSpecifications::is_nucleic_acids
bool is_nucleic_acids() const
Speedup and shortcut to test whether the alphabet() is "ACGT".
Definition: signature_specifications.hpp:152
genesis::sequence::SignatureSpecifications::alphabet
std::string const & alphabet() const
Definition: signature_specifications.hpp:130
genesis::sequence::SignatureSpecifications::kmer_reverse_complement_indices
std::vector< size_t > const & kmer_reverse_complement_indices() const
Get the indices for each kmer in kmer_list() to its reverse complement in the list.
Definition: signature_specifications.cpp:174
genesis::sequence::SignatureSpecifications::UnknownCharBehavior
UnknownCharBehavior
List of policies to decide what to do when a char that is not part of the alphabet occurs while count...
Definition: signature_specifications.hpp:76
genesis::sequence::SignatureSpecifications::char_index
size_t char_index(char c) const
Return the index of a char within the alphabet().
Definition: signature_specifications.hpp:163
genesis::sequence::SignatureSpecifications
Specifications for calculating signatures (like k-mer counts) from Sequences.
Definition: signature_specifications.hpp:64
genesis::sequence::SignatureSpecifications::UnknownCharBehavior::kSkip
@ kSkip
Simply ignore the char by skipping it.
genesis::sequence::SignatureSpecifications::kmer_combined_reverse_complement_map
std::vector< size_t > const & kmer_combined_reverse_complement_map() const
Get a map from indices of kmer_list() and signature_counts() vectors to a smaller list which combines...
Definition: signature_specifications.cpp:122