A library for working with phylogenetic and population genetic data.
v0.27.0
utils/tools/geodesy/functions.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
35 
36 #include <algorithm>
37 #include <array>
38 #include <cassert>
39 #include <cctype>
40 #include <cmath>
41 #include <iostream>
42 #include <ostream>
43 #include <regex>
44 #include <stdexcept>
45 #include <utility>
46 
47 namespace genesis {
48 namespace utils {
49 
50 // =================================================================================================
51 // Coordinate Conversion
52 // =================================================================================================
53 
58 {
59  kLatitude,
61 };
62 
71  std::string const& h1, // Hemisphere
72  std::string const& d, // Degrees
73  std::string const& m, // Minutes
74  std::string const& s, // Seconds
75  std::string const& h2, // Hemispheres
76  GeoCoordinateComponent component
77 ) {
78 
79  // Depending on the component, store which chars determine pos and neg sign,
80  // and the valid range for the hemisphere degrees.
81  assert(
82  component == GeoCoordinateComponent::kLatitude ||
84  );
85  char const pos_h = ( component == GeoCoordinateComponent::kLatitude ? 'n' : 'e' );
86  char const neg_h = ( component == GeoCoordinateComponent::kLatitude ? 's' : 'w' );
87  double const d_min = ( component == GeoCoordinateComponent::kLatitude ? -90.0 : -180.0 );
88  double const d_max = ( component == GeoCoordinateComponent::kLatitude ? +90.0 : +180.0 );
89 
90  // Get hemisphere string.
91  std::string h;
92  if( ! h1.empty() && ! h2.empty() ) {
93  throw std::invalid_argument( "Invalid coordinate: Has two hemisphere directives [NESW]." );
94  } else if( ! h1.empty() ) {
95  h = h1;
96  } else if( ! h2.empty() ) {
97  h = h2;
98  }
99 
100  // By design of the regex, the hemisphere string can only be 0 or 1 char long.
101  assert( h.size() < 2 );
102 
103  // Get hemisphere sign.
104  int sgn = 1;
105  if( h.size() == 1 ) {
106  if( tolower( h[0] ) == neg_h ) {
107  sgn = -1;
108  } else if( tolower( h[0] ) != pos_h ) {
109  if( component == GeoCoordinateComponent::kLatitude ) {
110  throw std::invalid_argument(
111  "Invalid coordinate: Latitude hemisphere directive not in [NS]."
112  );
113  } else {
114  throw std::invalid_argument(
115  "Invalid coordinate: Longitude hemisphere directive not in [EW]."
116  );
117  }
118  }
119  }
120 
121  // Calc degrees.
122  if( d.empty() ) {
123  throw std::invalid_argument( "Invalid coordinate: No degrees." );
124  }
125  double const dd = stod(d);
126  if( dd < d_min || dd > d_max ) {
127  throw std::invalid_argument(
128  "Invalid coordinate: Degrees outside of range [ " + std::to_string( d_min ) + ", " +
129  std::to_string( d_max ) + " ]."
130  );
131  }
132 
133  // If a hemisphere was explicitly set, and a sign for the degrees was explicitly set,
134  // we need to make sure that they match, and avoid double negatives.
135  if( h.size() == 1 && ( d[0] == '+' || d[0] == '-' )) {
136  if( ( sgn < 0 && dd > 0.0 ) || ( sgn > 0 && dd < 0.0 ) ) {
137  throw std::invalid_argument(
138  "Invalid coordinate: Hemisphere does not match sign of the degrees."
139  );
140  }
141  if( sgn < 0 && dd < 0.0 ) {
142  sgn = 1;
143  }
144  }
145 
146  // Calc minutes.
147  double md = 0.0;
148  if( ! m.empty() ) {
149  md = stod(m) ;
150 
151  if( std::count( d.begin(), d.end(), '.') == 1 ) {
152  throw std::invalid_argument(
153  "Invalid coordinate: Degrees cannot have decimal part if Minutes are provided."
154  );
155  }
156  }
157  if( md < 0.0 || md > 60.0 ) {
158  throw std::invalid_argument(
159  "Invalid coordinate: Minutes outside of range [ 0.0, 60.0 ]."
160  );
161  }
162 
163  // Calc seconds.
164  double sd = 0.0;
165  if( ! s.empty() ) {
166  sd = stod(s) ;
167 
168  if( std::count( m.begin(), m.end(), '.') == 1 ) {
169  throw std::invalid_argument(
170  "Invalid coordinate: Minutes cannot have decimal part if Seconds are provided."
171  );
172  }
173  }
174  if( sd < 0.0 || sd > 60.0 ) {
175  throw std::invalid_argument(
176  "Invalid coordinate: Seconds outside of range [ 0.0, 60.0 ]."
177  );
178  }
179 
180  // Calculate the final value. We need to take care of adding up negative degrees correctly.
181  double v = 0.0;
182  if( dd < 0.0 ) {
183  v = static_cast<double>( sgn ) * -1.0 * ( ( -1.0 * dd ) + md / 60.0 + sd / 3600.0 );
184  } else {
185  v = static_cast<double>( sgn ) * ( dd + md / 60.0 + sd / 3600.0 );
186  }
187 
188  // Finally, we need to check again, as the degrees were added up now.
189  if( v < d_min || v > d_max ) {
190  throw std::invalid_argument(
191  "Invalid coordinate: Degrees outside of range [ " + std::to_string( d_min ) + ", " +
192  std::to_string( d_max ) + " ]."
193  );
194  }
195  return v;
196 }
197 
198 std::string sanitize_geo_coordinate( std::string const& coordinates, bool two_components )
199 {
200  // So far, this is the only use case of multi byte characters that we have.
201  // Would be way too much effort to introduce proper encoding here,
202  // so instead we simply treat each multi byte char as just that,
203  // a series of bytes, and replace them by single byte ascii characters as needed.
204 
205  // Lookup list of what we want to replace.
206  // Converted using "UTF-8 code units" from https://r12a.github.io/apps/conversion/
207  // The order of the list is important. E.g., we have to replace two primes before one prime!
208  std::vector<std::pair<std::string, std::string>> list = {
209  { "\xC2\xB0", "d" }, // Degree Symbol
210  { "\xE2\x80\xB2\xE2\x80\xB2", "s" }, // Two Primes = Seconds
211  { "\xE2\x80\xB2", "m" }, // Prime = Minute
212  { "\xE2\x80\xB3", "s" }, // Double Prime = Seconds
213  };
214 
215  // Replace all bad (multi byte) chars by their ascii counterparts.
216  auto res = coordinates;
217  for( auto const& bad : list ) {
218  size_t pos = 0;
219  while( pos < res.size() ) {
220 
221  // Check if we find the bad chars...
222  pos = res.find( bad.first, pos );
223  if( pos == std::string::npos ) {
224  break;
225  }
226 
227  // ...and replace them.
228  res = res.replace( pos, bad.first.size(), bad.second );
229  }
230  }
231 
232  // If there is only one comma, replace it with a slash, so that there is no confusion
233  // between comma as decimal separator and comma as separator between two component of the coord.
234  if( two_components && std::count( res.begin(), res.end(), ',') == 1 ) {
235  auto const pos = res.find( ',' );
236  assert( pos < res.size() );
237  res[ pos ] = '/';
238  }
239 
240  // Now replace all commas with dots, so that parsing to double works.
241  for( size_t i = 0; i < res.size(); ++i ) {
242  if( res[i] == ',' ) {
243  res[i] = '.';
244  }
245  }
246 
247  return res;
248 }
249 
250 GeoCoordinate convert_geo_coordinate( std::string const& latitude, std::string const& longitude )
251 {
252  return convert_geo_coordinate( latitude + " / " + longitude );
253 }
254 
255 GeoCoordinate convert_geo_coordinate( std::string const& coordinate )
256 {
257  // ISO Format, which we probably also should recognize, but currently don't:
258  // https://www.w3.org/2005/Incubator/geo/Wiki/LatitudeLongitudeAltitude
259 
260  // Prepare static regex (no need to re-compile it on every function call).
261  // Alternative regex: https://gist.github.com/moole/3707127
262  static std::string const single_expr =
263  "([NESW])?[\\s]*" // Group 1: Hemisphere
264  "((?:[\\+-]?[0-9]*[\\.][0-9]+)|(?:[\\+-]?[0-9]+))" // Group 2: Degrees
265  "(?:" // Optional A: All or nothing
266  "(?:(?:[\\s]*[d:][\\s]*)|[\\s]+)" // Degree Delimiter
267  "(?:" // Optional Minutes
268  "((?:[0-9]*[\\.][0-9]+)|(?:[0-9]+))" // Group 3: Minutes
269  "(?:" // Optional B: All or nothing
270  "(?:(?:[\\s]*['m:][\\s]*)|[\\s]+)" // Minutes Delimiter
271  "(?:" // Optional Seconds
272  "((?:[0-9]*[\\.][0-9]+)|(?:[0-9]+))" // Group 4: Seconds
273  "(?:(?:[\\s]*(?:[\"s:]|(?:''))[\\s]*)|[\\s]*)" // Seconds Delimiter
274  ")?" // Optional Seconds
275  ")?" // Optional B
276  ")?" // Optional Minutes
277  ")?" // Optional A
278  "[\\s]*([NESW])?" // Group 5: Hemisphere
279  ;
280  static std::string const double_expr =
281  "^[\\s]*" + single_expr + "(?:(?:[\\s]*[/][\\s]*)|[\\s]+)" + single_expr + "[\\s]*$"
282  ;
283  static std::regex pattern( double_expr );
284 
285  // Run the expression.
286  std::smatch matches;
287  auto const sanitized = sanitize_geo_coordinate( coordinate );
288  if( ! std::regex_search( sanitized, matches, pattern )) {
289  throw std::invalid_argument( "Invalid coordinate format." );
290  }
291 
292  // Print for debug.
293  // for (size_t i = 0; i < matches.size(); ++i) {
294  // std::cout << " " << i << ": '" << matches[i].str() << "'\n";
295  // }
296 
297  // Calculate component values.
298  double const lat = convert_single_geo_coordinate_(
299  matches[1].str(), matches[2].str(), matches[3].str(), matches[4].str(), matches[5].str(),
301  );
302  double const lon = convert_single_geo_coordinate_(
303  matches[6].str(), matches[7].str(), matches[8].str(), matches[9].str(), matches[10].str(),
305  );
306  assert( - 90.0 <= lat && lat <= 90.0 );
307  assert( -180.0 <= lon && lon <= 180.0 );
308 
309  // Make and return result.
310  return { lat, lon };
311 }
312 
313 // =================================================================================================
314 // Distance
315 // =================================================================================================
316 
317 double geo_distance( GeoCoordinate const& c1, GeoCoordinate const& c2 )
318 {
319  // Function adapted from https://rosettacode.org/wiki/Haversine_formula#C
320 
321  // Convert to rad, for trigonometry.
322  auto to_rad = []( double d ){
323  return PI * d / 180.0;
324  };
325 
326  // Get proper angles.
327  auto const th1 = to_rad( c1.latitude() );
328  auto const ph1 = to_rad( c1.longitude() );
329  auto const th2 = to_rad( c2.latitude() );
330  auto const ph2 = to_rad( c2.longitude() );
331 
332  // Get parts of the formula.
333  auto const dx = cos( ph1 - ph2 ) * cos( th1 ) - cos( th2 );
334  auto const dy = sin( ph1 - ph2 ) * cos( th1 );
335  auto const dz = sin( th1 ) - sin( th2 );
336 
337  return asin( sqrt( dx * dx + dy * dy + dz * dz ) / 2.0 ) * 2.0 * EARTH_MEAN_RADIUS;
338 }
339 
340 // =================================================================================================
341 // Printing
342 // =================================================================================================
343 
344 std::ostream& operator<< ( std::ostream& os, GeoCoordinate const& coord )
345 {
346  os << "( " << coord.latitude() << ", " << coord.longitude() << " )";
347  return os;
348 }
349 
350 } // namespace utils
351 } // namespace genesis
genesis::utils::operator<<
std::ostream & operator<<(std::ostream &os, const Matrix< signed char > &matrix)
Template specialization for signed char, in order to print nicely.
Definition: utils/containers/matrix/operators.cpp:89
genesis::utils::GeoCoordinate
Geographical coordinates in degrees.
Definition: geo_coordinate.hpp:46
functions.hpp
common.hpp
genesis::utils::sanitize_geo_coordinate
std::string sanitize_geo_coordinate(std::string const &coordinates, bool two_components)
Replace non-ascii symbols used in geographic coordinates by their ascii equivalents.
Definition: utils/tools/geodesy/functions.cpp:198
genesis::utils::EARTH_MEAN_RADIUS
constexpr double EARTH_MEAN_RADIUS
Earth is not flat!
Definition: utils/tools/geodesy/functions.hpp:49
genesis::utils::GeoCoordinateComponent::kLongitude
@ kLongitude
genesis::utils::GeoCoordinateComponent::kLatitude
@ kLatitude
genesis::utils::convert_single_geo_coordinate_
static double convert_single_geo_coordinate_(std::string const &h1, std::string const &d, std::string const &m, std::string const &s, std::string const &h2, GeoCoordinateComponent component)
Local helper function that takes parts of the regex matches and converts them to double.
Definition: utils/tools/geodesy/functions.cpp:70
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: functions/genome_locus.hpp:48
string.hpp
Provides some commonly used string utility functions.
genesis::utils::PI
constexpr double PI
Make the world go round.
Definition: common.hpp:55
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::GeoCoordinate::latitude
double latitude() const
Latitude, in range [ -90.0, 90.0 ].
Definition: geo_coordinate.hpp:77
genesis::utils::convert_geo_coordinate
GeoCoordinate convert_geo_coordinate(std::string const &latitude, std::string const &longitude)
Parse strings of geographic coordinates.
Definition: utils/tools/geodesy/functions.cpp:250
genesis::utils::geo_distance
double geo_distance(GeoCoordinate const &c1, GeoCoordinate const &c2)
Calculate the distance (in km) between two points on Earth.
Definition: utils/tools/geodesy/functions.cpp:317
genesis::utils::GeoCoordinate::longitude
double longitude() const
Longitude, in range [ -180.0, 180.0 ].
Definition: geo_coordinate.hpp:85
genesis::utils::GeoCoordinateComponent
GeoCoordinateComponent
Local helper enum that indicates which component of a coordinate we are dealing with.
Definition: utils/tools/geodesy/functions.cpp:57