A toolkit for working with phylogenetic data.
v0.19.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fs.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
33 #include <cctype>
34 #include <dirent.h>
35 #include <errno.h>
36 #include <fstream>
37 #include <functional>
38 #include <regex>
39 #include <sstream>
40 #include <stdexcept>
41 #include <streambuf>
42 #include <sys/stat.h>
43 
47 
48 namespace genesis {
49 namespace utils {
50 
51 // =================================================================================================
52 // File Access
53 // =================================================================================================
54 
55 bool path_exists( std::string const& path )
56 {
57  struct stat info;
58  return ( stat( path.c_str(), &info ) == 0 );
59 }
60 
61 bool is_file( std::string const& path )
62 {
63  return file_exists( path );
64 }
65 
66 bool file_exists( std::string const& filename )
67 {
68  // There are plenty of discussions on stackoverflow on how to do this correctly,
69  // e.g., https://stackoverflow.com/a/12774387
70  // None of them worked for me, meaning that they also returned true for directories.
71  // Thus, we use a simple approach that does a basic check, and then also tests for dir...
72 
73  std::ifstream infile(filename);
74  infile.seekg( 0, std::ios::end) ;
75  return infile.good() && ! is_dir( filename );
76 }
77 
78 std::string file_read( std::string const& filename )
79 {
80  std::ifstream infile(filename);
81  std::string str;
82 
83  if (!infile.good()) {
84  throw std::runtime_error( "Cannot read from file '" + filename + "'." );
85  }
86 
87  infile.seekg(0, std::ios::end);
88  str.reserve(infile.tellg());
89  infile.seekg(0, std::ios::beg);
90 
91  str.assign((std::istreambuf_iterator<char>(infile)),
92  std::istreambuf_iterator<char>());
93  return str;
94 }
95 
96 void file_write( std::string const& content, std::string const& filename )
97 {
98  // TODO check if path exists, create if not (make a function for that)
99 
100  std::ofstream ofs;
101  utils::file_output_stream( filename, ofs );
102  ofs << content;
103 }
104 
105 void file_append( std::string const& content, std::string const& filename )
106 {
107  // TODO check if path exists, create if not (make a function for that)
108  // TODO maybe merge with file_write and use mode as optional parameter.
109 
110  std::ofstream out_stream( filename, std::ofstream::app );
111  if( out_stream.fail() ) {
112  throw std::runtime_error( "Cannot append to file '" + filename + "'." );
113  }
114  out_stream << content;
115 }
116 
117 // =================================================================================================
118 // Directory Access
119 // =================================================================================================
120 
121 bool is_dir( std::string const& path )
122 {
123  return dir_exists( path );
124 }
125 
126 bool dir_exists( std::string const& dir )
127 {
128  struct stat info;
129  if (stat (dir.c_str(), &info) != 0) {
130  return false;
131  }
132  return static_cast<bool>( info.st_mode & S_IFDIR );
133 
134  // alternative implementation:
135  // DIR* dp = opendir(dir);
136  // if (dp) {
137  // closedir(dir);
138  // return dp;
139  // } else {
140  // return false;
141  // }
142 }
143 
144 void dir_create( std::string const& path, bool with_parents )
145 {
146  mode_t mode = 0775;
147  struct stat info;
148 
149  // Checks. If it is the current dir, do nothing.
150  auto const path_no_bs = utils::trim_right( path, "/\\");
151  if( path_no_bs.empty() ) {
152  return;
153  }
154 
155  // Run recursively.
156  if( with_parents ) {
157  if( ! dir_exists( path_no_bs ) && path_no_bs.size() > 0 ) {
158  dir_create( file_path( path_no_bs ), true );
159  }
160  }
161 
162  // Try to make dir.
163  if( stat (path.c_str(), &info) != 0 ) {
164  if( mkdir( path.c_str(), mode ) != 0 && errno != EEXIST ) {
165  throw std::runtime_error( "Cannot create directory: " + path );
166  }
167  } else if( !S_ISDIR(info.st_mode) ) {
168  throw std::runtime_error( "Path exists, but is not a directory: " + path );
169  }
170 }
171 
172 std::string dir_normalize_path( std::string const& path )
173 {
174  return utils::trim_right( path, "/") + "/";
175 }
176 
177 std::vector<std::string> dir_list_contents_(
178  std::string const& dir,
179  bool full_path,
180  std::string const& regex,
181  std::function<bool( std::string const& )> condition
182 ) {
183  std::vector<std::string> list;
184  auto const dir_path = dir_normalize_path( dir );
185  std::regex pattern( regex );
186 
187  DIR* dp;
188  struct dirent* dirp;
189 
190  if( ( dp = opendir( dir.c_str() )) == nullptr) {
191  throw std::runtime_error( "Cannot open directory '" + dir + "'." );
192  }
193  while ((dirp = readdir(dp)) != nullptr) {
194  auto const fn = std::string( dirp->d_name );
195 
196  if (fn == "." || fn == "..") {
197  continue;
198  }
199  if( ! regex.empty() && ! regex_match( fn, pattern ) ) {
200  continue;
201  }
202  if( ! condition( dir_path + fn ) ) {
203  continue;
204  }
205 
206  if( full_path ) {
207  list.push_back( dir_path + fn );
208  } else {
209  list.push_back( fn );
210  }
211  }
212  closedir(dp);
213 
214  //~ std::sort(list.begin(), list.end());
215  return list;
216 }
217 
218 std::vector<std::string> dir_list_contents(
219  std::string const& dir,
220  bool full_path,
221  std::string const& regex
222 ) {
223  return dir_list_contents_(
224  dir, full_path, regex,
225  []( std::string const& ){ return true; }
226  );
227 }
228 
229 std::vector<std::string> dir_list_files(
230  std::string const& dir,
231  bool full_path,
232  std::string const& regex
233 ) {
234  return dir_list_contents_(
235  dir, full_path, regex, is_file
236  );
237 }
238 
239 std::vector<std::string> dir_list_directories(
240  std::string const& dir,
241  bool full_path,
242  std::string const& regex
243 ) {
244  return dir_list_contents_(
245  dir, full_path, regex, is_dir
246  );
247 }
248 
249 // =================================================================================================
250 // File Information
251 // =================================================================================================
252 
253 std::unordered_map<std::string, std::string> file_info( std::string const& filename )
254 {
255  std::string basename = file_basename(filename);
256  std::unordered_map<std::string, std::string> res;
257 
258  res["path"] = file_path(filename);
259  res["basename"] = basename;
260  res["filename"] = file_filename(basename);
261  res["extension"] = file_extension(basename);
262 
263  return res;
264 }
265 
266 size_t file_size( std::string const& filename )
267 {
268  auto result = filename;
269  std::ifstream in(result, std::ifstream::ate | std::ifstream::binary);
270  return static_cast<size_t>(in.tellg());
271 }
272 
273 std::string file_path( std::string const& filename )
274 {
275  auto result = filename;
276  const size_t idx = result.find_last_of("\\/");
277  if( idx == std::string::npos ) {
278  return "";
279  }
280 
281  result.erase(idx);
282  return result;
283 }
284 
285 std::string file_basename( std::string const& filename )
286 {
287  auto result = filename;
288  const size_t idx = result.find_last_of("\\/");
289  if (idx != std::string::npos)
290  {
291  result.erase(0, idx + 1);
292  }
293  return result;
294 }
295 
296 std::string file_filename( std::string const& filename )
297 {
298  auto result = filename;
299  const size_t idx = result.rfind('.');
300  if (idx != 0 && idx != std::string::npos)
301  {
302  result.erase(idx);
303  }
304  return result;
305 }
306 
307 std::string file_extension( std::string const& filename )
308 {
309  auto result = filename;
310  const size_t idx = result.rfind('.');
311  if (idx != 0 && idx != std::string::npos)
312  {
313  result.erase(0, idx + 1);
314  }
315  return result;
316 }
317 
318 // =================================================================================================
319 // File Names
320 // =================================================================================================
321 
322 bool is_valid_filname( std::string const& filename )
323 {
324  // No empty filenames.
325  if( trim( filename ) == "" ) {
326  return false;
327  }
328 
329  // No space at beginning or end.
330  if( starts_with( filename, " " ) || ends_with( filename, " " )) {
331  return false;
332  }
333 
334  // Check forbidden chars of Win and Unix systems.
335  if( filename.find_first_of( "<>:\"\\/|?*" ) != std::string::npos ) {
336  return false;
337  }
338 
339  // Check for non-printable chars.
340  // They might be allowed on most systems, but better be conservative here.
341  for( auto c : filename ) {
342  if( ! isprint( c ) ) {
343  return false;
344  }
345  }
346 
347  return true;
348 }
349 
350 std::string sanitize_filname( std::string const& filename )
351 {
352  // Prepare result.
353  std::string result = "";
354  result.reserve( filename.size() );
355 
356  // Copy all printable chars, drop the others.
357  for( auto c : filename ) {
358  if( isprint( c ) ) {
359  result += c;
360  }
361  }
362 
363  // No spaces around the name, and replace all forbidden chars by underscores.
364  result = trim( result );
365  result = replace_all_chars( result, "<>:\"\\/|?*", '_' );
366 
367  if( result == "" ) {
368  throw std::runtime_error( "Invalid filename." );
369  }
370 
371  return result;
372 }
373 
374 } // namespace utils
375 } // namespace genesis
bool is_file(std::string const &path)
Return true iff the provided path is a file.
Definition: fs.cpp:61
bool path_exists(std::string const &path)
Return whether a path exists, i.e., is a file or directory.
Definition: fs.cpp:55
utils::Range< IteratorPath< TreeLink const, TreeNode const, TreeEdge const > > path(ElementType const &start, ElementType const &finish)
Definition: path.hpp:325
Provides some valuable algorithms that are not part of the C++ 11 STL.
bool starts_with(std::string const &text, std::string const &start)
Return whether a string starts with another string.
Definition: string.cpp:73
std::string trim(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with trimmed white spaces.
Definition: string.cpp:311
void file_output_stream(std::string const &filename, std::ofstream &out_stream, std::ios_base::openmode mode=std::ios_base::out)
Helper function to obtain an output stream to a file.
std::string dir_normalize_path(std::string const &path)
Normalize a dir name, i.e., make sure that the given path ends with exaclty one slash.
Definition: fs.cpp:172
std::string file_filename(std::string const &filename)
Remove extension if present.
Definition: fs.cpp:296
bool file_exists(std::string const &filename)
Return true iff the file exists.
Definition: fs.cpp:66
std::string trim_right(std::string const &s, std::string const &delimiters)
Return a copy of the input string, with left trimmed white spaces.
Definition: string.cpp:287
bool dir_exists(std::string const &dir)
Return true iff the directory exists.
Definition: fs.cpp:126
std::vector< std::string > dir_list_contents(std::string const &dir, bool full_path, std::string const &regex)
Get a list of files and directories in a directory.
Definition: fs.cpp:218
std::vector< std::string > dir_list_directories(std::string const &dir, bool full_path, std::string const &regex)
Get a list of directories in a directory.
Definition: fs.cpp:239
std::unordered_map< std::string, std::string > file_info(std::string const &filename)
Return information about a file.
Definition: fs.cpp:253
std::string replace_all_chars(std::string const &text, std::string const &search_chars, char replace)
Replace all occurrences of the search_chars in text by the replace char.
Definition: string.cpp:273
std::vector< std::string > dir_list_files(std::string const &dir, bool full_path, std::string const &regex)
Get a list of files in a directory.
Definition: fs.cpp:229
bool is_valid_filname(std::string const &filename)
Check whether a file name is valid.
Definition: fs.cpp:322
size_t file_size(std::string const &filename)
Return the size of a file.
Definition: fs.cpp:266
std::string file_path(std::string const &filename)
Return the path leading to a file.
Definition: fs.cpp:273
void dir_create(std::string const &path, bool with_parents)
Create a directory.
Definition: fs.cpp:144
std::string sanitize_filname(std::string const &filename)
Remove or replace all invalid parts of a filename.
Definition: fs.cpp:350
std::vector< std::string > dir_list_contents_(std::string const &dir, bool full_path, std::string const &regex, std::function< bool(std::string const &)> condition)
Definition: fs.cpp:177
std::string file_basename(std::string const &filename)
Remove directory name from file name if present.
Definition: fs.cpp:285
Provides some commonly used string utility functions.
Provides functions for accessing the file system.
bool ends_with(std::string const &text, std::string const &ending)
Return whether a string ends with another string.
Definition: string.cpp:81
std::string file_read(std::string const &filename)
Return the contents of a file as a string.
Definition: fs.cpp:78
bool is_dir(std::string const &path)
Return true iff the provided path is a directory.
Definition: fs.cpp:121
void file_append(std::string const &content, std::string const &filename)
Append the content of a string to a file.
Definition: fs.cpp:105
std::string file_extension(std::string const &filename)
Return the extension name of a file.
Definition: fs.cpp:307
void file_write(std::string const &content, std::string const &filename)
Write the content of a string to a file.
Definition: fs.cpp:96