A toolkit for working with phylogenetic data.
v0.24.0
utils/tools/geodesy/functions.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
35 
36 #include <algorithm>
37 #include <array>
38 #include <cassert>
39 #include <cctype>
40 #include <cmath>
41 #include <iostream>
42 #include <ostream>
43 #include <regex>
44 #include <stdexcept>
45 #include <utility>
46 
47 namespace genesis {
48 namespace utils {
49 
50 // =================================================================================================
51 // Coordinate Conversion
52 // =================================================================================================
53 
58 {
59  kLatitude,
61 };
62 
71  std::string const& h1, // Hemisphere
72  std::string const& d, // Degrees
73  std::string const& m, // Minutes
74  std::string const& s, // Seconds
75  std::string const& h2, // Hemispheres
76  GeoCoordinateComponent component
77 ) {
78 
79  // Depending on the component, store which chars determine pos and neg sign,
80  // and the valid range for the hemisphere degrees.
81  assert(
82  component == GeoCoordinateComponent::kLatitude ||
84  );
85  char const pos_h = ( component == GeoCoordinateComponent::kLatitude ? 'n' : 'e' );
86  char const neg_h = ( component == GeoCoordinateComponent::kLatitude ? 's' : 'w' );
87  double const d_min = ( component == GeoCoordinateComponent::kLatitude ? -90.0 : -180.0 );
88  double const d_max = ( component == GeoCoordinateComponent::kLatitude ? +90.0 : +180.0 );
89 
90  // Get hemisphere string.
91  std::string h;
92  if( ! h1.empty() && ! h2.empty() ) {
93  throw std::invalid_argument( "Invalid coordinate: Has two hemisphere directives [NESW]." );
94  } else if( ! h1.empty() ) {
95  h = h1;
96  } else if( ! h2.empty() ) {
97  h = h2;
98  }
99 
100  // By design of the regex, the hemisphere string can only be 0 or 1 char long.
101  assert( h.size() < 2 );
102 
103  // Get hemisphere sign.
104  int sgn = 1;
105  if( h.size() == 1 ) {
106  if( tolower( h[0] ) == neg_h ) {
107  sgn = -1;
108  } else if( tolower( h[0] ) != pos_h ) {
109  if( component == GeoCoordinateComponent::kLatitude ) {
110  throw std::invalid_argument(
111  "Invalid coordinate: Latitude hemisphere directive not in [NS]."
112  );
113  } else {
114  throw std::invalid_argument(
115  "Invalid coordinate: Longitude hemisphere directive not in [EW]."
116  );
117  }
118  }
119  }
120 
121  // Calc degrees.
122  if( d.empty() ) {
123  throw std::invalid_argument( "Invalid coordinate: No degrees." );
124  }
125  double const dd = stod(d);
126  if( dd < d_min || dd > d_max ) {
127  throw std::invalid_argument(
128  "Invalid coordinate: Degrees outside of range [ " + std::to_string( d_min ) + ", " +
129  std::to_string( d_max ) + " ]."
130  );
131  }
132 
133  // If a hemisphere was explicitly set, and a sign for the degrees was explicitly set,
134  // we need to make sure that they match, and avoid double negatives.
135  if( h.size() == 1 && ( d[0] == '+' || d[0] == '-' )) {
136  if( ( sgn < 0 && dd > 0.0 ) || ( sgn > 0 && dd < 0.0 ) ) {
137  throw std::invalid_argument(
138  "Invalid coordinate: Hemisphere does not match sign of the degrees."
139  );
140  }
141  if( sgn < 0 && dd < 0.0 ) {
142  sgn = 1;
143  }
144  }
145 
146  // Calc minutes.
147  double md = 0.0;
148  if( ! m.empty() ) {
149  md = stod(m) ;
150 
151  if( std::count( d.begin(), d.end(), '.') == 1 ) {
152  throw std::invalid_argument(
153  "Invalid coordinate: Degrees cannot have decimal part if Minutes are provided."
154  );
155  }
156  }
157  if( md < 0.0 || md > 60.0 ) {
158  throw std::invalid_argument(
159  "Invalid coordinate: Minutes outside of range [ 0.0, 60.0 ]."
160  );
161  }
162 
163  // Calc seconds.
164  double sd = 0.0;
165  if( ! s.empty() ) {
166  sd = stod(s) ;
167 
168  if( std::count( m.begin(), m.end(), '.') == 1 ) {
169  throw std::invalid_argument(
170  "Invalid coordinate: Minutes cannot have decimal part if Seconds are provided."
171  );
172  }
173  }
174  if( sd < 0.0 || sd > 60.0 ) {
175  throw std::invalid_argument(
176  "Invalid coordinate: Seconds outside of range [ 0.0, 60.0 ]."
177  );
178  }
179 
180  // Calculate the final value. We need to take care of adding up negative degrees correctly.
181  double v = 0.0;
182  if( dd < 0.0 ) {
183  v = static_cast<double>( sgn ) * -1.0 * ( ( -1.0 * dd ) + md / 60.0 + sd / 3600.0 );
184  } else {
185  v = static_cast<double>( sgn ) * ( dd + md / 60.0 + sd / 3600.0 );
186  }
187 
188  // Finally, we need to check again, as the degrees were added up now.
189  if( v < d_min || v > d_max ) {
190  throw std::invalid_argument(
191  "Invalid coordinate: Degrees outside of range [ " + std::to_string( d_min ) + ", " +
192  std::to_string( d_max ) + " ]."
193  );
194  }
195  return v;
196 }
197 
198 std::string sanitize_geo_coordinate( std::string const& coordinates, bool two_components )
199 {
200  // So far, this is the only use case of multi byte characters that we have.
201  // Would be way too much effort to introduce proper encoding here,
202  // so instead we simply treat each multi byte char as just that,
203  // a series of bytes, and replace them by single byte ascii characters as needed.
204 
205  // Lookup list of what we want to replace.
206  // Converted using "UTF-8 code units" from https://r12a.github.io/apps/conversion/
207  // The order of the list is important. E.g., we have to replace two primes before one prime!
208  std::vector<std::pair<std::string, std::string>> list = {
209  { "\xC2\xB0", "d" }, // Degree Symbol
210  { "\xE2\x80\xB2\xE2\x80\xB2", "s" }, // Two Primes = Seconds
211  { "\xE2\x80\xB2", "m" }, // Prime = Minute
212  { "\xE2\x80\xB3", "s" }, // Double Prime = Seconds
213  };
214 
215  // Replace all bad (multi byte) chars by their ascii counterparts.
216  auto res = coordinates;
217  for( auto const& bad : list ) {
218  size_t pos = 0;
219  while( pos < res.size() ) {
220 
221  // Check if we find the bad chars...
222  pos = res.find( bad.first, pos );
223  if( pos == std::string::npos ) {
224  break;
225  }
226 
227  // ...and replace them.
228  res = res.replace( pos, bad.first.size(), bad.second );
229  }
230  }
231 
232  // If there is only one comma, replace it with a slash, so that there is no confusion
233  // between comma as decimal separator and comma as separator between two component of the coord.
234  if( two_components && std::count( res.begin(), res.end(), ',') == 1 ) {
235  auto const pos = res.find( ',' );
236  assert( pos < res.size() );
237  res[ pos ] = '/';
238  }
239 
240  // Now replace all commas with dots, so that parsing to double works.
241  for( size_t i = 0; i < res.size(); ++i ) {
242  if( res[i] == ',' ) {
243  res[i] = '.';
244  }
245  }
246 
247  return res;
248 }
249 
250 GeoCoordinate convert_geo_coordinate( std::string const& latitude, std::string const& longitude )
251 {
252  return convert_geo_coordinate( latitude + " / " + longitude );
253 }
254 
255 GeoCoordinate convert_geo_coordinate( std::string const& coordinate )
256 {
257  // ISO Format, which we probably also should recognize, but currently don't:
258  // https://www.w3.org/2005/Incubator/geo/Wiki/LatitudeLongitudeAltitude
259 
260  // Prepare static regex (no need to re-compile it on every function call).
261  // Alternative regex: https://gist.github.com/moole/3707127
262  static std::string const single_expr =
263  "([NESW])?[\\s]*" // Group 1: Hemisphere
264  "((?:[\\+-]?[0-9]*[\\.][0-9]+)|(?:[\\+-]?[0-9]+))" // Group 2: Degrees
265  "(?:" // Optional A: All or nothing
266  "(?:(?:[\\s]*[d:][\\s]*)|[\\s]+)" // Degree Delimiter
267  "(?:" // Optional Minutes
268  "((?:[0-9]*[\\.][0-9]+)|(?:[0-9]+))" // Group 3: Minutes
269  "(?:" // Optional B: All or nothing
270  "(?:(?:[\\s]*['m:][\\s]*)|[\\s]+)" // Minutes Delimiter
271  "(?:" // Optional Seconds
272  "((?:[0-9]*[\\.][0-9]+)|(?:[0-9]+))" // Group 4: Seconds
273  "(?:(?:[\\s]*(?:[\"s:]|(?:''))[\\s]*)|[\\s]*)" // Seconds Delimiter
274  ")?" // Optional Seconds
275  ")?" // Optional B
276  ")?" // Optional Minutes
277  ")?" // Optional A
278  "[\\s]*([NESW])?" // Group 5: Hemisphere
279  ;
280  static std::string const double_expr =
281  "^[\\s]*" + single_expr + "(?:(?:[\\s]*[/][\\s]*)|[\\s]+)" + single_expr + "[\\s]*$"
282  ;
283  static std::regex pattern( double_expr );
284 
285  // Run the expression.
286  std::smatch matches;
287  auto const sanitized = sanitize_geo_coordinate( coordinate );
288  if( ! std::regex_search( sanitized, matches, pattern )) {
289  throw std::invalid_argument( "Invalid coordinate format." );
290  }
291 
292  // Print for debug.
293  // for (size_t i = 0; i < matches.size(); ++i) {
294  // std::cout << " " << i << ": '" << matches[i].str() << "'\n";
295  // }
296 
297  // Calculate component values.
298  double const lat = convert_single_geo_coordinate_(
299  matches[1].str(), matches[2].str(), matches[3].str(), matches[4].str(), matches[5].str(),
301  );
302  double const lon = convert_single_geo_coordinate_(
303  matches[6].str(), matches[7].str(), matches[8].str(), matches[9].str(), matches[10].str(),
305  );
306  assert( - 90.0 <= lat && lat <= 90.0 );
307  assert( -180.0 <= lon && lon <= 180.0 );
308 
309  // Make and return result.
310  return { lat, lon };
311 }
312 
313 // =================================================================================================
314 // Distance
315 // =================================================================================================
316 
317 double geo_distance( GeoCoordinate const& c1, GeoCoordinate const& c2 )
318 {
319  // Function adapted from https://rosettacode.org/wiki/Haversine_formula#C
320 
321  // Convert to rad, for trigonometry.
322  auto to_rad = []( double d ){
323  return PI * d / 180.0;
324  };
325 
326  // Get proper angles.
327  auto const th1 = to_rad( c1.latitude() );
328  auto const ph1 = to_rad( c1.longitude() );
329  auto const th2 = to_rad( c2.latitude() );
330  auto const ph2 = to_rad( c2.longitude() );
331 
332  // Get parts of the formula.
333  auto const dx = cos( ph1 - ph2 ) * cos( th1 ) - cos( th2 );
334  auto const dy = sin( ph1 - ph2 ) * cos( th1 );
335  auto const dz = sin( th1 ) - sin( th2 );
336 
337  return asin( sqrt( dx * dx + dy * dy + dz * dz ) / 2.0 ) * 2.0 * EARTH_MEAN_RADIUS;
338 }
339 
340 // =================================================================================================
341 // Printing
342 // =================================================================================================
343 
344 std::ostream& operator<< ( std::ostream& os, GeoCoordinate const& coord )
345 {
346  os << "( " << coord.latitude() << ", " << coord.longitude() << " )";
347  return os;
348 }
349 
350 } // namespace utils
351 } // namespace genesis
std::string sanitize_geo_coordinate(std::string const &coordinates, bool two_components)
Replace non-ascii symbols used in geographic coordinates by their ascii equivalents.
static double convert_single_geo_coordinate_(std::string const &h1, std::string const &d, std::string const &m, std::string const &s, std::string const &h2, GeoCoordinateComponent component)
Local helper function that takes parts of the regex matches and converts them to double.
Geographical coordinates in degrees.
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
std::ostream & operator<<(std::ostream &os, const Matrix< signed char > &matrix)
Template specialization for signed char, in order to print nicely.
double geo_distance(GeoCoordinate const &c1, GeoCoordinate const &c2)
Calculate the distance (in km) between two points on Earth.
constexpr double PI
Make the world go round.
Definition: common.hpp:54
GeoCoordinateComponent
Local helper enum that indicates which component of a coordinate we are dealing with.
double latitude() const
Latitude, in range [ -90.0, 90.0 ].
GeoCoordinate convert_geo_coordinate(std::string const &latitude, std::string const &longitude)
Parse strings of geographic coordinates.
Provides some commonly used string utility functions.
std::shared_ptr< BaseOutputTarget > to_string(std::string &target_string)
Obtain an output target for writing to a string.
constexpr double EARTH_MEAN_RADIUS
Earth is not flat!
double longitude() const
Longitude, in range [ -180.0, 180.0 ].