A library for working with phylogenetic data.
v0.25.0
vcf_common.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_FORMATS_VCF_COMMON_H_
2 #define GENESIS_POPULATION_FORMATS_VCF_COMMON_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2021 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
34 #ifdef GENESIS_HTSLIB
35 
36 #include <cstdint>
37 #include <string>
38 #include <vector>
39 
40 extern "C" {
41  // #include <htslib/vcf.h>
42  // #include <htslib/hts.h>
43 }
44 
45 namespace genesis {
46 namespace population {
47 
48 // =================================================================================================
49 // Typedefs and Enums
50 // =================================================================================================
51 
60 enum class VcfHeaderLine : int
61 {
62  kFilter = 0,
63  kInfo = 1,
64  kFormat = 2,
65  kContig = 3,
66  kStructured = 4, // structured header line TAG=<A=..,B=..>
67  kGeneric = 5 // generic header line
68 };
69 
78 enum class VcfValueType : int
79 {
80  kFlag = 0,
81  kInteger = 1,
82  kFloat = 2,
83  kString = 3
84 };
85 
95 enum class VcfValueSpecial : int
96 {
106  kFixed = 0,
107 
112  kVariable = 1,
113 
117  kAllele = 2,
118 
123  kGenotype = 3,
124 
129  kReference = 4,
130 };
131 
141 {
142  std::string id;
145  int number;
146  std::string description;
147 };
148 
149 // =================================================================================================
150 // Typedef and Enum Helpers
151 // =================================================================================================
152 
153 std::string vcf_value_type_to_string( VcfValueType ht_type );
154 std::string vcf_value_type_to_string( int ht_type );
155 std::string vcf_value_special_to_string( VcfValueSpecial vl_type_num );
156 std::string vcf_value_special_to_string( int vl_type_num );
157 
162 std::string vcf_hl_type_to_string( int hl_type );
163 
164 // =================================================================================================
165 // VCF Genotype Functions
166 // =================================================================================================
167 
168 // Forward declare.
169 class VcfGenotype;
170 
178 std::string vcf_genotype_string( std::vector<VcfGenotype> const& genotypes );
179 
189 size_t vcf_genotype_sum( std::vector<VcfGenotype> const& genotypes );
190 
191 // =================================================================================================
192 // VCF Genotype
193 // =================================================================================================
194 
219 {
220 public:
221 
222  // -------------------------------------------------------------------------
223  // Constructors and Rule of Five
224  // -------------------------------------------------------------------------
225 
226  explicit VcfGenotype( int32_t genotype )
227  : genotype_(genotype)
228  {}
229 
230  ~VcfGenotype() = default;
231 
232  VcfGenotype( VcfGenotype const& ) = default;
233  VcfGenotype( VcfGenotype&& ) = default;
234 
235  VcfGenotype& operator= ( VcfGenotype const& ) = default;
236  VcfGenotype& operator= ( VcfGenotype&& ) = default;
237 
238  // -------------------------------------------------------------------------
239  // Access Functions
240  // -------------------------------------------------------------------------
241 
248  int32_t variant_index() const;
249 
255  bool is_reference() const;
256 
263  bool is_alternative() const;
264 
270  bool is_missing() const;
271 
282  bool is_phased() const;
283 
287  int32_t data() const;
288 
289  // -------------------------------------------------------------------------
290  // Data Members
291  // -------------------------------------------------------------------------
292 
293 private:
294 
295  int32_t genotype_;
296 };
297 
298 } // namespace population
299 } // namespace genesis
300 
301 #endif // htslib guard
302 #endif // include guard
genesis::population::VcfGenotype::is_reference
bool is_reference() const
True iff the called variant of this genotype is the REF allele.
Definition: vcf_common.cpp:249
genesis::population::VcfGenotype
Simple wrapper class for one genotype field for a sample.
Definition: vcf_common.hpp:218
genesis::population::VcfGenotype::is_missing
bool is_missing() const
True iff the variant call is missing for this genotype.
Definition: vcf_common.cpp:259
genesis::population::VcfGenotype::variant_index
int32_t variant_index() const
Return the index of the variant set for this genotype call.
Definition: vcf_common.cpp:244
genesis::population::VcfSpecification::description
std::string description
Definition: vcf_common.hpp:146
genesis::population::VcfGenotype::is_phased
bool is_phased() const
True iff the called variant is phased.
Definition: vcf_common.cpp:264
genesis::population::VcfValueSpecial::kAllele
@ kAllele
genesis::population::VcfValueSpecial::kGenotype
@ kGenotype
genesis::population::vcf_hl_type_to_string
std::string vcf_hl_type_to_string(int hl_type)
Internal helper function to convert htslib-internal BCF_HL_* header line type values to their string ...
Definition: vcf_common.cpp:199
genesis::population::VcfValueType::kString
@ kString
genesis::population::VcfGenotype::data
int32_t data() const
Return the raw genotype value as used by htslib.
Definition: vcf_common.cpp:269
genesis::population::VcfHeaderLine::kFilter
@ kFilter
genesis::population::VcfHeaderLine
VcfHeaderLine
Specification for the values determining header line types of VCF/BCF files.
Definition: vcf_common.hpp:60
genesis::population::VcfSpecification::id
std::string id
Definition: vcf_common.hpp:142
genesis::population::VcfValueSpecial
VcfValueSpecial
Specification for special markers for the number of values expected for key-value-pairs of VCF/BCF fi...
Definition: vcf_common.hpp:95
genesis::population::VcfValueSpecial::kVariable
@ kVariable
Variable number of possible values, or unknown, or unbounded. In VCF, this is denoted by '....
genesis::population::VcfValueType::kFloat
@ kFloat
genesis::population::VcfSpecification
Collect the four required keys that describe an INFO or FORMAT sub-field of VCF/BCF files.
Definition: vcf_common.hpp:140
genesis::population::VcfHeaderLine::kGeneric
@ kGeneric
genesis::population::VcfGenotype::VcfGenotype
VcfGenotype(int32_t genotype)
Definition: vcf_common.hpp:226
genesis::population::VcfHeaderLine::kContig
@ kContig
genesis::population::VcfSpecification::special
VcfValueSpecial special
Definition: vcf_common.hpp:144
genesis::population::VcfValueSpecial::kReference
@ kReference
genesis::population::VcfHeaderLine::kStructured
@ kStructured
genesis::population::VcfGenotype::~VcfGenotype
~VcfGenotype()=default
genesis::population::vcf_genotype_sum
size_t vcf_genotype_sum(std::vector< VcfGenotype > const &genotypes)
Return the sum of genotypes for a set of VcfGenotype entries, typically used to construct a genotype ...
Definition: vcf_common.cpp:230
genesis::population::VcfValueType::kInteger
@ kInteger
genesis::population::VcfValueSpecial::kFixed
@ kFixed
Fixed number of values expected. In VCF, this is denoted simply by an integer number.
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::VcfGenotype::operator=
VcfGenotype & operator=(VcfGenotype const &)=default
genesis::population::vcf_value_special_to_string
std::string vcf_value_special_to_string(VcfValueSpecial vl_type_num)
Definition: vcf_common.cpp:166
genesis::population::VcfSpecification::type
VcfValueType type
Definition: vcf_common.hpp:143
genesis::population::VcfHeaderLine::kFormat
@ kFormat
genesis::population::vcf_genotype_string
std::string vcf_genotype_string(std::vector< VcfGenotype > const &genotypes)
Return the VCF-like string representation of a set of VcfGenotype entries.
Definition: vcf_common.cpp:216
genesis::population::VcfGenotype::is_alternative
bool is_alternative() const
True iff the called variant of this genotype is not the REF, but one of the ALT alleles.
Definition: vcf_common.cpp:254
genesis::population::VcfValueType
VcfValueType
Specification for the data type of the values expected in key-value-pairs of VCF/BCF files.
Definition: vcf_common.hpp:78
genesis::population::VcfHeaderLine::kInfo
@ kInfo
genesis::population::vcf_value_type_to_string
std::string vcf_value_type_to_string(VcfValueType ht_type)
Definition: vcf_common.cpp:136
genesis::population::VcfSpecification::number
int number
Definition: vcf_common.hpp:145
genesis::population::VcfValueType::kFlag
@ kFlag