41 #include <unordered_map>
62 {
'Y',
"pYrimidine" },
79 {
'B',
"Aspartic acid or Asparagine" },
81 {
'D',
"Aspartic acid" },
82 {
'E',
"Glutamic acid" },
83 {
'F',
"Phenylalanine" },
86 {
'I',
"Isoleucine" },
87 {
'J',
"Leucine or Isoleucine" },
90 {
'M',
"Methionine" },
91 {
'N',
"Asparagine" },
92 {
'O',
"Pyrrolysine" },
98 {
'U',
"Selenocysteine" },
100 {
'W',
"Tryptophan" },
102 {
'Z',
"Glutamic acid or Glutamine" },
105 {
'*',
"translation stop" },
144 {
'C',
"LightMagenta" },
175 {
'A', { 1.0, 0.0, 0.0 } },
176 {
'C', { 0.0, 1.0, 0.0 } },
177 {
'G', { 1.0, 1.0, 0.0 } },
178 {
'T', { 0.0, 0.0, 1.0 } },
179 {
'U', { 0.0, 0.0, 1.0 } },
181 {
'W', { 0.376, 0.376, 0.376 } },
182 {
'S', { 0.376, 0.376, 0.376 } },
183 {
'M', { 0.376, 0.376, 0.376 } },
184 {
'K', { 0.376, 0.376, 0.376 } },
185 {
'R', { 0.376, 0.376, 0.376 } },
186 {
'Y', { 0.376, 0.376, 0.376 } },
188 {
'B', { 0.5, 0.5, 0.5 } },
189 {
'D', { 0.5, 0.5, 0.5 } },
190 {
'H', { 0.5, 0.5, 0.5 } },
191 {
'V', { 0.5, 0.5, 0.5 } },
193 {
'N', { 0.5, 0.5, 0.5 } },
194 {
'O', { 0.5, 0.5, 0.5 } },
195 {
'X', { 0.5, 0.5, 0.5 } },
196 {
'.', { 0.5, 0.5, 0.5 } },
197 {
'-', { 0.5, 0.5, 0.5 } },
198 {
'?', { 0.5, 0.5, 0.5 } }
202 {
'A', { 0.098, 0.500, 1.000 } },
203 {
'B', { 0.376, 0.376, 0.376 } },
204 {
'C', { 0.902, 0.500, 0.500 } },
205 {
'D', { 0.800, 0.302, 0.800 } },
206 {
'E', { 0.800, 0.302, 0.800 } },
207 {
'F', { 0.098, 0.500, 1.000 } },
208 {
'G', { 0.902, 0.600, 0.302 } },
209 {
'H', { 0.098, 0.702, 0.702 } },
210 {
'I', { 0.098, 0.500, 1.000 } },
211 {
'J', { 0.376, 0.376, 0.376 } },
212 {
'K', { 0.902, 0.200, 0.098 } },
213 {
'L', { 0.098, 0.500, 1.000 } },
214 {
'M', { 0.098, 0.500, 1.000 } },
215 {
'N', { 0.098, 0.800, 0.098 } },
216 {
'O', { 0.376, 0.376, 0.376 } },
217 {
'P', { 0.800, 0.800, 0.000 } },
218 {
'Q', { 0.098, 0.800, 0.098 } },
219 {
'R', { 0.902, 0.200, 0.098 } },
220 {
'S', { 0.098, 0.800, 0.098 } },
221 {
'T', { 0.098, 0.800, 0.098 } },
222 {
'U', { 0.376, 0.376, 0.376 } },
223 {
'V', { 0.098, 0.500, 1.000 } },
224 {
'W', { 0.098, 0.500, 1.000 } },
225 {
'Y', { 0.098, 0.702, 0.702 } },
226 {
'Z', { 0.376, 0.376, 0.376 } },
228 {
'X', { 0.5, 0.5, 0.5 } },
229 {
'*', { 0.5, 0.5, 0.5 } },
230 {
'-', { 0.5, 0.5, 0.5 } },
231 {
'?', { 0.5, 0.5, 0.5 } }
335 return "ACDEFGHIKLMNOPQRSTUVWY";
363 std::sort( normalized.begin(), normalized.end() );
364 normalized.erase( std::unique( normalized.begin(), normalized.end() ), normalized.end() );
405 if( accept_degenerated ) {
408 throw std::invalid_argument(
409 "Degenerated nucleic acid code not accepted: " + std::string( 1, code )
423 throw std::invalid_argument(
"Not a nucleic acid code: " + std::string( 1, code ) );
482 if( accept_degenerated ) {
485 throw std::invalid_argument(
486 "Degenerated amino acid code not accepted: " + std::string( 1, code )
497 throw std::invalid_argument(
"Not an amino acid code: " + std::string( 1, code ) );
504 auto result = std::string( sequence.size(),
'-' );
507 auto rev_comp = [](
char c ){
544 throw std::invalid_argument(
"Not a nucleic acid code: " + std::string( 1, c ) );
549 for(
size_t i = 0; i < sequence.size(); ++i ) {
550 char c = sequence[i];
554 if( c ==
'n' || c ==
'N' ) {
555 if( accept_degenerated ) {
556 result[ sequence.size() - i - 1 ] =
'N';
559 throw std::invalid_argument(
560 "Degenerated nucleic acid code not accepted: " + std::string( 1, c )
567 result[ sequence.size() - i - 1 ] = rev_comp( c );
590 throw std::invalid_argument(
591 "No transition base for nucleic acid code: " + std::string( 1, code )
600 auto binary_code_ = [ undetermined_matches_all ](
char c ){
634 if( undetermined_matches_all ) {
645 throw std::invalid_argument(
"Not a nucleic acid code: " + std::string( 1, c ) );
652 auto const ab = binary_code_( an );
653 auto const bb = binary_code_( bn );
655 return ( ab & bb ) > 0;
688 auto ucode = toupper(code);
690 throw std::out_of_range(
"Invalid nucleic acid code '" + std::string( 1, code ) +
"'." );
697 auto ucode = toupper(code);
699 throw std::out_of_range(
"Invalid amino acid code '" + std::string( 1, code ) +
"'." );
706 auto ucode = toupper(code);
708 throw std::out_of_range(
"Invalid nucleic acid code '" + std::string( 1, code ) +
"'." );
717 std::sort( tmp.begin(), tmp.end() );
718 tmp.erase( std::unique( tmp.begin(), tmp.end() ), tmp.end() );
721 throw std::out_of_range(
"Invalid nucleic acid codes '" + codes +
"'." );