Skip to content

Commit b39a477

Browse files
ryanofskykiminuo
andcommitted
refactor: Add fs::PathToString, fs::PathFromString, u8string, u8path functions
There is no change in behavior. This just helps prepare for the transition from the boost::filesystem to the std::filesystem path implementation. Co-authored-by: Kiminuo <[email protected]>
1 parent 113b863 commit b39a477

File tree

2 files changed

+104
-1
lines changed

2 files changed

+104
-1
lines changed

src/fs.h

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,83 @@
1515
#include <boost/filesystem/fstream.hpp>
1616

1717
/** Filesystem operations and types */
18-
namespace fs = boost::filesystem;
18+
namespace fs {
19+
20+
using namespace boost::filesystem;
21+
22+
/**
23+
* Path class wrapper to prepare application code for transition from
24+
* boost::filesystem library to std::filesystem implementation. The main
25+
* purpose of the class is to define fs::path::u8string() and fs::u8path()
26+
* functions not present in boost. In the next git commit, it also blocks calls
27+
* to the fs::path(std::string) implicit constructor and the fs::path::string()
28+
* method, which worked well in the boost::filesystem implementation, but have
29+
* unsafe and unpredictable behavior on Windows in the std::filesystem
30+
* implementation (see implementation note in \ref PathToString for details).
31+
*/
32+
class path : public boost::filesystem::path
33+
{
34+
public:
35+
using boost::filesystem::path::path;
36+
path(boost::filesystem::path path) : boost::filesystem::path::path(std::move(path)) {}
37+
38+
// Define UTF-8 string conversion method not present in boost::filesystem but present in std::filesystem.
39+
std::string u8string() const { return boost::filesystem::path::string(); }
40+
};
41+
42+
// Define UTF-8 string conversion function not present in boost::filesystem but present in std::filesystem.
43+
static inline path u8path(const std::string& string)
44+
{
45+
return boost::filesystem::path(string);
46+
}
47+
48+
/**
49+
* Convert path object to byte string. On POSIX, paths natively are byte
50+
* strings so this is trivial. On Windows, paths natively are Unicode, so an
51+
* encoding step is necessary.
52+
*
53+
* The inverse of \ref PathToString is \ref PathFromString. The strings
54+
* returned and parsed by these functions can be used to call POSIX APIs, and
55+
* for roundtrip conversion, logging, and debugging. But they are not
56+
* guaranteed to be valid UTF-8, and are generally meant to be used internally,
57+
* not externally. When communicating with external programs and libraries that
58+
* require UTF-8, fs::path::u8string() and fs::u8path() methods can be used.
59+
* For other applications, if support for non UTF-8 paths is required, or if
60+
* higher-level JSON or XML or URI or C-style escapes are preferred, it may be
61+
* also be appropriate to use different path encoding functions.
62+
*
63+
* Implementation note: On Windows, the std::filesystem::path(string)
64+
* constructor and std::filesystem::path::string() method are not safe to use
65+
* here, because these methods encode the path using C++'s narrow multibyte
66+
* encoding, which on Windows corresponds to the current "code page", which is
67+
* unpredictable and typically not able to represent all valid paths. So
68+
* std::filesystem::path::u8string() and std::filesystem::u8path() functions
69+
* are used instead on Windows. On POSIX, u8string/u8path functions are not
70+
* safe to use because paths are not always valid UTF-8, so plain string
71+
* methods which do not transform the path there are used.
72+
*/
73+
static inline std::string PathToString(const path& path)
74+
{
75+
#ifdef WIN32
76+
return path.u8string();
77+
#else
78+
static_assert(std::is_same<path::string_type, std::string>::value, "PathToString not implemented on this platform");
79+
return path.boost::filesystem::path::string();
80+
#endif
81+
}
82+
83+
/**
84+
* Convert byte string to path object. Inverse of \ref PathToString.
85+
*/
86+
static inline path PathFromString(const std::string& string)
87+
{
88+
#ifdef WIN32
89+
return u8path(string);
90+
#else
91+
return boost::filesystem::path(string);
92+
#endif
93+
}
94+
} // namespace fs
1995

2096
/** Bridge operations to C stdio */
2197
namespace fsbridge {

src/test/fs_tests.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,33 @@
1111

1212
BOOST_FIXTURE_TEST_SUITE(fs_tests, BasicTestingSetup)
1313

14+
BOOST_AUTO_TEST_CASE(fsbridge_pathtostring)
15+
{
16+
std::string u8_str = "fs_tests_₿_🏃";
17+
BOOST_CHECK_EQUAL(fs::PathToString(fs::PathFromString(u8_str)), u8_str);
18+
BOOST_CHECK_EQUAL(fs::u8path(u8_str).u8string(), u8_str);
19+
BOOST_CHECK_EQUAL(fs::PathFromString(u8_str).u8string(), u8_str);
20+
BOOST_CHECK_EQUAL(fs::PathToString(fs::u8path(u8_str)), u8_str);
21+
#ifndef WIN32
22+
// On non-windows systems, verify that arbitrary byte strings containing
23+
// invalid UTF-8 can be round tripped successfully with PathToString and
24+
// PathFromString. On non-windows systems, paths are just byte strings so
25+
// these functions do not do any encoding. On windows, paths are Unicode,
26+
// and these functions do encoding and decoding, so the behavior of this
27+
// test would be undefined.
28+
std::string invalid_u8_str = "\xf0";
29+
BOOST_CHECK_EQUAL(invalid_u8_str.size(), 1);
30+
BOOST_CHECK_EQUAL(fs::PathToString(fs::PathFromString(invalid_u8_str)), invalid_u8_str);
31+
#endif
32+
}
33+
34+
BOOST_AUTO_TEST_CASE(fsbridge_stem)
35+
{
36+
std::string test_filename = "fs_tests_₿_🏃.dat";
37+
std::string expected_stem = "fs_tests_₿_🏃";
38+
BOOST_CHECK_EQUAL(fs::PathToString(fs::PathFromString(test_filename).stem()), expected_stem);
39+
}
40+
1441
BOOST_AUTO_TEST_CASE(fsbridge_fstream)
1542
{
1643
fs::path tmpfolder = m_args.GetDataDirBase();

0 commit comments

Comments
 (0)