A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fs.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2017 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
33 #include <cctype>
34 #include <dirent.h>
35 #include <errno.h>
36 #include <fstream>
37 #include <functional>
38 #include <regex>
39 #include <sstream>
40 #include <stdexcept>
41 #include <streambuf>
42 #include <sys/stat.h>
43 
47 
48 namespace genesis {
49 namespace utils {
50 
51 // =================================================================================================
52 // File Access
53 // =================================================================================================
54 
61 bool is_file( std::string const& path )
62 {
63  return file_exists( path );
64 }
65 
69 bool file_exists( std::string const& filename )
70 {
71  std::ifstream infile(filename);
72  return infile.good();
73 }
74 
80 std::string file_read( std::string const& filename )
81 {
82  std::ifstream infile(filename);
83  std::string str;
84 
85  if (!infile.good()) {
86  throw std::runtime_error( "Cannot read from file '" + filename + "'." );
87  }
88 
89  infile.seekg(0, std::ios::end);
90  str.reserve(infile.tellg());
91  infile.seekg(0, std::ios::beg);
92 
93  str.assign((std::istreambuf_iterator<char>(infile)),
94  std::istreambuf_iterator<char>());
95  return str;
96 }
97 
106 void file_write( std::string const& content, std::string const& filename )
107 {
108  // TODO check if path exists, create if not (make a function for that)
109 
110  std::ofstream ofs;
111  utils::file_output_stream( filename, ofs );
112  ofs << content;
113 }
114 
120 void file_append( std::string const& content, std::string const& filename )
121 {
122  // TODO check if path exists, create if not (make a function for that)
123  // TODO maybe merge with file_write and use mode as optional parameter.
124 
125  std::ofstream out_stream( filename, std::ofstream::app );
126  if( out_stream.fail() ) {
127  throw std::runtime_error( "Cannot append to file '" + filename + "'." );
128  }
129  out_stream << content;
130 }
131 
132 // =================================================================================================
133 // Directory Access
134 // =================================================================================================
135 
142 bool is_dir( std::string const& path )
143 {
144  return dir_exists( path );
145 }
146 
150 bool dir_exists( std::string const& dir )
151 {
152  struct stat info;
153  if (stat (dir.c_str(), &info) != 0) {
154  return false;
155  }
156  return static_cast<bool>( info.st_mode & S_IFDIR );
157 
158  // alternative implementation:
159  // DIR* dp = opendir(dir);
160  // if (dp) {
161  // closedir(dir);
162  // return dp;
163  // } else {
164  // return false;
165  // }
166 }
167 
175 void dir_create( std::string const& path )
176 {
177  mode_t mode = 0775;
178  struct stat info;
179 
180  if( stat (path.c_str(), &info) != 0 ) {
181  if( mkdir( path.c_str(), mode ) != 0 && errno != EEXIST ) {
182  throw std::runtime_error("Cannot create directory: " + path);
183  }
184  } else if( !S_ISDIR(info.st_mode) ) {
185  throw std::runtime_error("Path exists, but is not a directory: " + path);
186  }
187 }
188 
192 std::string dir_normalize_path( std::string const& path )
193 {
194  return utils::trim_right( path, "/") + "/";
195 }
196 
197 std::vector<std::string> dir_list_contents_(
198  std::string const& dir,
199  bool full_path,
200  std::string const& regex,
201  std::function<bool( std::string const& )> condition
202 ) {
203  std::vector<std::string> list;
204  auto const dir_path = dir_normalize_path( dir );
205  std::regex pattern( regex );
206 
207  DIR* dp;
208  struct dirent* dirp;
209 
210  if( ( dp = opendir( dir.c_str() )) == nullptr) {
211  throw std::runtime_error( "Cannot open directory '" + dir + "'." );
212  }
213  while ((dirp = readdir(dp)) != nullptr) {
214  auto const fn = std::string( dirp->d_name );
215 
216  if (fn == "." || fn == "..") {
217  continue;
218  }
219  if( ! regex.empty() && ! regex_match( fn, pattern ) ) {
220  continue;
221  }
222  if( ! condition( dir_path + fn ) ) {
223  continue;
224  }
225 
226  if( full_path ) {
227  list.push_back( dir_path + fn );
228  } else {
229  list.push_back( fn );
230  }
231  }
232  closedir(dp);
233 
234  //~ std::sort(list.begin(), list.end());
235  return list;
236 }
237 
238 std::vector<std::string> dir_list_contents(
239  std::string const& dir,
240  bool full_path,
241  std::string const& regex
242 ) {
243  return dir_list_contents_(
244  dir, full_path, regex,
245  []( std::string const& ){ return true; }
246  );
247 }
248 
249 std::vector<std::string> dir_list_files(
250  std::string const& dir,
251  bool full_path,
252  std::string const& regex
253 ) {
254  return dir_list_contents_(
255  dir, full_path, regex, is_file
256  );
257 }
258 
259 std::vector<std::string> dir_list_directories(
260  std::string const& dir,
261  bool full_path,
262  std::string const& regex
263 ) {
264  return dir_list_contents_(
265  dir, full_path, regex, is_dir
266  );
267 }
268 
269 // =================================================================================================
270 // File Information
271 // =================================================================================================
272 
276 std::unordered_map<std::string, std::string> file_info( std::string const& filename )
277 {
278  std::string basename = file_basename(filename);
279  std::unordered_map<std::string, std::string> res;
280 
281  res["path"] = file_path(filename);
282  res["basename"] = basename;
283  res["filename"] = file_filename(basename);
284  res["extension"] = file_extension(basename);
285 
286  return res;
287 }
288 
292 size_t file_size( std::string const& filename )
293 {
294  auto result = filename;
295  std::ifstream in(result, std::ifstream::ate | std::ifstream::binary);
296  return static_cast<size_t>(in.tellg());
297 }
298 
304 std::string file_path( std::string const& filename )
305 {
306  auto result = filename;
307  const size_t idx = result.find_last_of("\\/");
308  if (idx != std::string::npos)
309  {
310  result.erase(idx);
311  }
312  return result;
313 }
314 
318 std::string file_basename( std::string const& filename )
319 {
320  auto result = filename;
321  const size_t idx = result.find_last_of("\\/");
322  if (idx != std::string::npos)
323  {
324  result.erase(0, idx + 1);
325  }
326  return result;
327 }
328 
335 std::string file_filename( std::string const& filename )
336 {
337  auto result = filename;
338  const size_t idx = result.rfind('.');
339  if (idx != 0 && idx != std::string::npos)
340  {
341  result.erase(idx);
342  }
343  return result;
344 }
345 
351 std::string file_extension( std::string const& filename )
352 {
353  auto result = filename;
354  const size_t idx = result.rfind('.');
355  if (idx != 0 && idx != std::string::npos)
356  {
357  result.erase(0, idx + 1);
358  }
359  return result;
360 }
361 
362 // =================================================================================================
363 // File Names
364 // =================================================================================================
365 
386 bool is_valid_filname( std::string const& filename )
387 {
388  // No empty filenames.
389  if( trim( filename ) == "" ) {
390  return false;
391  }
392 
393  // No space at beginning or end.
394  if( starts_with( filename, " " ) || ends_with( filename, " " )) {
395  return false;
396  }
397 
398  // Check forbidden chars of Win and Unix systems.
399  if( filename.find_first_of( "<>:\"\\/|?*" ) != std::string::npos ) {
400  return false;
401  }
402 
403  // Check for non-printable chars.
404  // They might be allowed on most systems, but better be conservative here.
405  for( auto c : filename ) {
406  if( ! isprint( c ) ) {
407  return false;
408  }
409  }
410 
411  return true;
412 }
413 
435 std::string sanitize_filname( std::string const& filename )
436 {
437  // Prepare result.
438  std::string result = "";
439  result.reserve( filename.size() );
440 
441  // Copy all printable chars, drop the others.
442  for( auto c : filename ) {
443  if( isprint( c ) ) {
444  result += c;
445  }
446  }
447 
448  // No spaces around the name, and replace all forbidden chars by underscores.
449  result = trim( result );
450  result = replace_all_chars( result, "<>:\"\\/|?*", '_' );
451 
452  if( result == "" ) {
453  throw std::runtime_error( "Invalid filename." );
454  }
455 
456  return result;
457 }
458 
459 } // namespace utils
460 } // namespace genesis
bool is_file(std::string const &path)
Return true iff the provided path is a file.
Definition: fs.cpp:61
utils::Range< IteratorPath< TreeLink const, TreeNode const, TreeEdge const > > path(ElementType const &start, ElementType const &finish)
Definition: path.hpp:325
Provides some valuable algorithms that are not part of the C++ 11 STL.
bool starts_with(std::string const &text, std::string const &start)
Return whether a string starts with another string.
Definition: string.cpp:61
std::string trim(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with trimmed white spaces.
Definition: string.cpp:238
void file_output_stream(std::string const &filename, std::ofstream &out_stream, std::ios_base::openmode mode=std::ios_base::out)
Helper function to obtain an output stream to a file.
std::string dir_normalize_path(std::string const &path)
Normalize a dir name, i.e., make sure that the given path ends with exaclty one slash.
Definition: fs.cpp:192
std::string file_filename(std::string const &filename)
Remove extension if present.
Definition: fs.cpp:335
bool file_exists(std::string const &filename)
Return true iff the file exists.
Definition: fs.cpp:69
std::string trim_right(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with left trimmed white spaces.
Definition: string.cpp:214
bool dir_exists(std::string const &dir)
Return true iff the directory exists.
Definition: fs.cpp:150
std::vector< std::string > dir_list_contents(std::string const &dir, bool full_path, std::string const &regex)
Get a list of files and directories in a directory.
Definition: fs.cpp:238
std::vector< std::string > dir_list_directories(std::string const &dir, bool full_path, std::string const &regex)
Get a list of directories in a directory.
Definition: fs.cpp:259
std::unordered_map< std::string, std::string > file_info(std::string const &filename)
Return information about a file.
Definition: fs.cpp:276
std::string replace_all_chars(std::string const &text, std::string const &search_chars, char replace)
Replace all occurrences of the search_chars in text by the replace char.
Definition: string.cpp:200
std::vector< std::string > dir_list_files(std::string const &dir, bool full_path, std::string const &regex)
Get a list of files in a directory.
Definition: fs.cpp:249
bool is_valid_filname(std::string const &filename)
Check whether a file name is valid.
Definition: fs.cpp:386
void dir_create(std::string const &path)
Create a directory.
Definition: fs.cpp:175
size_t file_size(std::string const &filename)
Return the size of a file.
Definition: fs.cpp:292
std::string file_path(std::string const &filename)
Return the path leading to a file.
Definition: fs.cpp:304
std::string sanitize_filname(std::string const &filename)
Remove or replace all invalid parts of a filename.
Definition: fs.cpp:435
std::vector< std::string > dir_list_contents_(std::string const &dir, bool full_path, std::string const &regex, std::function< bool(std::string const &)> condition)
Definition: fs.cpp:197
std::string file_basename(std::string const &filename)
Remove directory name from file name if present.
Definition: fs.cpp:318
Provides some commonly used string utility functions.
Provides functions for accessing the file system.
bool ends_with(std::string const &text, std::string const &ending)
Return whether a string ends with another string.
Definition: string.cpp:69
std::string file_read(std::string const &filename)
Return the contents of a file as a string.
Definition: fs.cpp:80
bool is_dir(std::string const &path)
Return true iff the provided path is a directory.
Definition: fs.cpp:142
void file_append(std::string const &content, std::string const &filename)
Append the content of a string to a file.
Definition: fs.cpp:120
std::string file_extension(std::string const &filename)
Return the extension name of a file.
Definition: fs.cpp:351
void file_write(std::string const &content, std::string const &filename)
Write the content of a string to a file.
Definition: fs.cpp:106