|
A library for working with phylogenetic and population genetic data.
v0.32.0
|
|
Go to the documentation of this file.
51 size_t parent_tax_id_pos,
58 if( pos >= line.size() ) {
59 throw std::runtime_error(
60 "NCBI node table line does not contain position " +
std::to_string( pos ) +
61 " for field " + field_name
65 assert( pos < line.size() );
70 for(
auto const& line : node_table ) {
72 node.
tax_id = get_field( line, tax_id_pos,
"tax_id" );
73 node.
parent_tax_id = get_field( line, parent_tax_id_pos,
"parent_tax_id" );
74 node.
rank = get_field( line, rank_pos,
"rank" );
77 if( result.count( node.
tax_id ) > 0 ) {
78 throw std::runtime_error(
"Multiple entries for NCBI node with tax_id " + node.
tax_id );
81 result[ node.
tax_id ] = node;
91 size_t name_class_pos,
92 std::string
const& name_class_filter
98 if( pos >= line.size() ) {
99 throw std::runtime_error(
100 "NCBI name table line does not contain position " +
std::to_string( pos ) +
101 " for field " + field_name
105 assert( pos < line.size() );
110 for(
auto const& line : name_table ) {
112 name.
tax_id = get_field( line, tax_id_pos,
"tax_id" );
113 name.
name = get_field( line, name_pos,
"name" );
114 name.
name_class = get_field( line, name_class_pos,
"name_class" );
122 if( result.count( name.
tax_id ) > 0 ) {
123 throw std::runtime_error(
"Multiple entries for NCBI name with tax_id " + name.
tax_id );
126 result[ name.
tax_id ] = name;
139 std::function<void(
NcbiNode const& node )> add_taxon = [&](
NcbiNode const& node ){
143 if( node.
taxon !=
nullptr ) {
149 throw std::runtime_error(
150 "Cannot find parent tax_id " + node.
parent_tax_id +
" for node " +
151 node.
tax_id +
" in the NCBI nodes."
163 if( parent_node.taxon !=
nullptr ) {
164 parent_tax = parent_node.taxon;
168 if( parent_node.tax_id == node.
tax_id ) {
169 parent_tax = &result;
174 add_taxon( parent_node );
177 assert( parent_node.taxon !=
nullptr );
178 parent_tax = parent_node.taxon;
182 assert( parent_tax );
185 if( names.count( node.
tax_id ) == 0 ) {
186 throw std::runtime_error(
"No name found for tax_id " + node.
tax_id );
188 auto const& name = names.at( node.
tax_id ).name;
193 auto& added = parent_tax->
add_child( name,
false );
202 for(
auto const& node_it : nodes ) {
203 auto const& node = node_it.second;
217 reader.separator_chars(
"|" );
218 reader.trim_chars(
"\t" );
219 reader.quotation_chars(
"" );
std::shared_ptr< BaseInputSource > from_file(std::string const &file_name, bool detect_compression=true)
Obtain an input source for reading from a file.
NcbiNodeLookup convert_ncbi_node_table(utils::CsvReader::Table const &node_table, size_t tax_id_pos, size_t parent_tax_id_pos, size_t rank_pos)
Taxon & add_child(Taxon const &child, bool merge_duplicates=true)
Add a child Taxon as a copy of a given Taxon and return it.
std::string to_string(GenomeLocus const &locus)
std::string const & rank() const
Return the rank of this taxon.
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
std::unordered_map< std::string, NcbiName > NcbiNameLookup
Store a Taxonomy, i.e., a nested hierarchy of Taxa.
Taxonomy read_ncbi_taxonomy(std::string const &node_file, std::string const &name_file)
Taxonomy convert_ncbi_tables(NcbiNodeLookup const &nodes, NcbiNameLookup const &names)
std::vector< Field > Line
std::string parent_tax_id
std::vector< Line > Table
Read Comma/Character Separated Values (CSV) data and other delimiter-separated formats.
std::unordered_map< std::string, NcbiNode > NcbiNodeLookup
NcbiNameLookup convert_ncbi_name_table(utils::CsvReader::Table const &name_table, size_t tax_id_pos, size_t name_pos, size_t name_class_pos, std::string const &name_class_filter)