Skip to content

Commit 1884ce2

Browse files
committed
Merge bitcoin/bitcoin#22937: refactor: Forbid calling unsafe fs::path(std::string) constructor and fs::path::string() method
6544ea5 refactor: Block unsafe fs::path std::string conversion calls (Russell Yanofsky) b39a477 refactor: Add fs::PathToString, fs::PathFromString, u8string, u8path functions (Russell Yanofsky) Pull request description: The `fs::path` class has a `std::string` constructor which will implicitly convert from strings. Implicit conversions like this are not great in general because they can hide complexity and inefficiencies in the code, but this case is especially bad, because after the transition from `boost::filesystem` to `std::filesystem` in #20744 the behavior of this constructor on windows will be more complicated and can mangle path strings. The `fs::path` class also has a `.string()` method which is inverse of the constructor and has the same problems. Fix this by replacing the unsafe method calls with `PathToString` and `PathFromString` function calls, and by forbidding unsafe method calls in the future. ACKs for top commit: kiminuo: ACK 6544ea5 laanwj: Code review ACK 6544ea5 hebasto: re-ACK 6544ea5, only added `fsbridge_stem` test case, updated comment, and rebased since my [previous](bitcoin/bitcoin#22937 (review)) review. Verified with the following command: Tree-SHA512: c36324740eb4ee55151146626166c00d5ccc4b6f3df777e75c112bcb4d1db436c1d9cc8c29a1e7fb96051457d317961ab42e6c380c3be2771d135771b2b49fa0
2 parents 6419bdf + 6544ea5 commit 1884ce2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+346
-195
lines changed

src/addrdb.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data
5858
if (fileout.IsNull()) {
5959
fileout.fclose();
6060
remove(pathTmp);
61-
return error("%s: Failed to open file %s", __func__, pathTmp.string());
61+
return error("%s: Failed to open file %s", __func__, fs::PathToString(pathTmp));
6262
}
6363

6464
// Serialize
@@ -70,7 +70,7 @@ bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data
7070
if (!FileCommit(fileout.Get())) {
7171
fileout.fclose();
7272
remove(pathTmp);
73-
return error("%s: Failed to flush file %s", __func__, pathTmp.string());
73+
return error("%s: Failed to flush file %s", __func__, fs::PathToString(pathTmp));
7474
}
7575
fileout.fclose();
7676

@@ -122,8 +122,8 @@ void DeserializeFileDB(const fs::path& path, Data& data, int version)
122122
} // namespace
123123

124124
CBanDB::CBanDB(fs::path ban_list_path)
125-
: m_banlist_dat(ban_list_path.string() + ".dat"),
126-
m_banlist_json(ban_list_path.string() + ".json")
125+
: m_banlist_dat(ban_list_path + ".dat"),
126+
m_banlist_json(ban_list_path + ".json")
127127
{
128128
}
129129

@@ -143,7 +143,7 @@ bool CBanDB::Write(const banmap_t& banSet)
143143
bool CBanDB::Read(banmap_t& banSet)
144144
{
145145
if (fs::exists(m_banlist_dat)) {
146-
LogPrintf("banlist.dat ignored because it can only be read by " PACKAGE_NAME " version 22.x. Remove %s to silence this warning.\n", m_banlist_dat);
146+
LogPrintf("banlist.dat ignored because it can only be read by " PACKAGE_NAME " version 22.x. Remove %s to silence this warning.\n", fs::quoted(fs::PathToString(m_banlist_dat)));
147147
}
148148
// If the JSON banlist does not exist, then recreate it
149149
if (!fs::exists(m_banlist_json)) {
@@ -155,15 +155,15 @@ bool CBanDB::Read(banmap_t& banSet)
155155

156156
if (!util::ReadSettings(m_banlist_json, settings, errors)) {
157157
for (const auto& err : errors) {
158-
LogPrintf("Cannot load banlist %s: %s\n", m_banlist_json.string(), err);
158+
LogPrintf("Cannot load banlist %s: %s\n", fs::PathToString(m_banlist_json), err);
159159
}
160160
return false;
161161
}
162162

163163
try {
164164
BanMapFromJson(settings[JSON_KEY], banSet);
165165
} catch (const std::runtime_error& e) {
166-
LogPrintf("Cannot parse banlist %s: %s\n", m_banlist_json.string(), e.what());
166+
LogPrintf("Cannot parse banlist %s: %s\n", fs::PathToString(m_banlist_json), e.what());
167167
return false;
168168
}
169169

@@ -194,12 +194,12 @@ std::optional<bilingual_str> LoadAddrman(const std::vector<bool>& asmap, const A
194194
} catch (const DbNotFoundError&) {
195195
// Addrman can be in an inconsistent state after failure, reset it
196196
addrman = std::make_unique<AddrMan>(asmap, /* deterministic */ false, /* consistency_check_ratio */ check_addrman);
197-
LogPrintf("Creating peers.dat because the file was not found (%s)\n", path_addr);
197+
LogPrintf("Creating peers.dat because the file was not found (%s)\n", fs::quoted(fs::PathToString(path_addr)));
198198
DumpPeerAddresses(args, *addrman);
199199
} catch (const std::exception& e) {
200200
addrman = nullptr;
201201
return strprintf(_("Invalid or corrupt peers.dat (%s). If you believe this is a bug, please report it to %s. As a workaround, you can move the file (%s) out of the way (rename, move, or delete) to have a new one created on the next start."),
202-
e.what(), PACKAGE_BUGREPORT, path_addr);
202+
e.what(), PACKAGE_BUGREPORT, fs::quoted(fs::PathToString(path_addr)));
203203
}
204204
return std::nullopt;
205205
}
@@ -215,7 +215,7 @@ std::vector<CAddress> ReadAnchors(const fs::path& anchors_db_path)
215215
std::vector<CAddress> anchors;
216216
try {
217217
DeserializeFileDB(anchors_db_path, anchors, CLIENT_VERSION | ADDRV2_FORMAT);
218-
LogPrintf("Loaded %i addresses from %s\n", anchors.size(), anchors_db_path.filename());
218+
LogPrintf("Loaded %i addresses from %s\n", anchors.size(), fs::quoted(fs::PathToString(anchors_db_path.filename())));
219219
} catch (const std::exception&) {
220220
anchors.clear();
221221
}

src/bitcoin-cli.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -786,7 +786,7 @@ static UniValue CallRPC(BaseRequestHandler* rh, const std::string& strMethod, co
786786
if (failedToGetAuthCookie) {
787787
throw std::runtime_error(strprintf(
788788
"Could not locate RPC credentials. No authentication cookie could be found, and RPC password is not set. See -rpcpassword and -stdinrpcpass. Configuration file: (%s)",
789-
GetConfigFile(gArgs.GetArg("-conf", BITCOIN_CONF_FILENAME)).string()));
789+
fs::PathToString(GetConfigFile(gArgs.GetArg("-conf", BITCOIN_CONF_FILENAME)))));
790790
} else {
791791
throw std::runtime_error("Authorization failed: Incorrect rpcuser or rpcpassword");
792792
}

src/dbwrapper.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ static leveldb::Options GetOptions(size_t nCacheSize)
115115
}
116116

117117
CDBWrapper::CDBWrapper(const fs::path& path, size_t nCacheSize, bool fMemory, bool fWipe, bool obfuscate)
118-
: m_name{path.stem().string()}
118+
: m_name{fs::PathToString(path.stem())}
119119
{
120120
penv = nullptr;
121121
readoptions.verify_checksums = true;
@@ -129,21 +129,21 @@ CDBWrapper::CDBWrapper(const fs::path& path, size_t nCacheSize, bool fMemory, bo
129129
options.env = penv;
130130
} else {
131131
if (fWipe) {
132-
LogPrintf("Wiping LevelDB in %s\n", path.string());
133-
leveldb::Status result = leveldb::DestroyDB(path.string(), options);
132+
LogPrintf("Wiping LevelDB in %s\n", fs::PathToString(path));
133+
leveldb::Status result = leveldb::DestroyDB(fs::PathToString(path), options);
134134
dbwrapper_private::HandleError(result);
135135
}
136136
TryCreateDirectories(path);
137-
LogPrintf("Opening LevelDB in %s\n", path.string());
137+
LogPrintf("Opening LevelDB in %s\n", fs::PathToString(path));
138138
}
139-
leveldb::Status status = leveldb::DB::Open(options, path.string(), &pdb);
139+
leveldb::Status status = leveldb::DB::Open(options, fs::PathToString(path), &pdb);
140140
dbwrapper_private::HandleError(status);
141141
LogPrintf("Opened LevelDB successfully\n");
142142

143143
if (gArgs.GetBoolArg("-forcecompactdb", false)) {
144-
LogPrintf("Starting database compaction of %s\n", path.string());
144+
LogPrintf("Starting database compaction of %s\n", fs::PathToString(path));
145145
pdb->CompactRange(nullptr, nullptr);
146-
LogPrintf("Finished database compaction of %s\n", path.string());
146+
LogPrintf("Finished database compaction of %s\n", fs::PathToString(path));
147147
}
148148

149149
// The base-case obfuscation key, which is a noop.
@@ -160,10 +160,10 @@ CDBWrapper::CDBWrapper(const fs::path& path, size_t nCacheSize, bool fMemory, bo
160160
Write(OBFUSCATE_KEY_KEY, new_key);
161161
obfuscate_key = new_key;
162162

163-
LogPrintf("Wrote new obfuscate key for %s: %s\n", path.string(), HexStr(obfuscate_key));
163+
LogPrintf("Wrote new obfuscate key for %s: %s\n", fs::PathToString(path), HexStr(obfuscate_key));
164164
}
165165

166-
LogPrintf("Using obfuscation key for %s: %s\n", path.string(), HexStr(obfuscate_key));
166+
LogPrintf("Using obfuscation key for %s: %s\n", fs::PathToString(path), HexStr(obfuscate_key));
167167
}
168168

169169
CDBWrapper::~CDBWrapper()

src/flatfile.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,11 @@ FILE* FlatFileSeq::Open(const FlatFilePos& pos, bool read_only)
4141
if (!file && !read_only)
4242
file = fsbridge::fopen(path, "wb+");
4343
if (!file) {
44-
LogPrintf("Unable to open file %s\n", path.string());
44+
LogPrintf("Unable to open file %s\n", fs::PathToString(path));
4545
return nullptr;
4646
}
4747
if (pos.nPos && fseek(file, pos.nPos, SEEK_SET)) {
48-
LogPrintf("Unable to seek to position %u of %s\n", pos.nPos, path.string());
48+
LogPrintf("Unable to seek to position %u of %s\n", pos.nPos, fs::PathToString(path));
4949
fclose(file);
5050
return nullptr;
5151
}

src/fs.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ namespace fsbridge {
2424
FILE *fopen(const fs::path& p, const char *mode)
2525
{
2626
#ifndef WIN32
27-
return ::fopen(p.string().c_str(), mode);
27+
return ::fopen(p.c_str(), mode);
2828
#else
2929
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>,wchar_t> utf8_cvt;
3030
return ::_wfopen(p.wstring().c_str(), utf8_cvt.from_bytes(mode).c_str());
@@ -46,7 +46,7 @@ static std::string GetErrorReason()
4646

4747
FileLock::FileLock(const fs::path& file)
4848
{
49-
fd = open(file.string().c_str(), O_RDWR);
49+
fd = open(file.c_str(), O_RDWR);
5050
if (fd == -1) {
5151
reason = GetErrorReason();
5252
}
@@ -249,9 +249,9 @@ void ofstream::close()
249249
#else // __GLIBCXX__
250250

251251
#if BOOST_VERSION >= 107700
252-
static_assert(sizeof(*BOOST_FILESYSTEM_C_STR(fs::path())) == sizeof(wchar_t),
252+
static_assert(sizeof(*BOOST_FILESYSTEM_C_STR(boost::filesystem::path())) == sizeof(wchar_t),
253253
#else
254-
static_assert(sizeof(*fs::path().BOOST_FILESYSTEM_C_STR) == sizeof(wchar_t),
254+
static_assert(sizeof(*boost::filesystem::path().BOOST_FILESYSTEM_C_STR) == sizeof(wchar_t),
255255
#endif // BOOST_VERSION >= 107700
256256
"Warning: This build is using boost::filesystem ofstream and ifstream "
257257
"implementations which will fail to open paths containing multibyte "

src/fs.h

Lines changed: 131 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,132 @@
1313

1414
#include <boost/filesystem.hpp>
1515
#include <boost/filesystem/fstream.hpp>
16+
#include <tinyformat.h>
1617

1718
/** Filesystem operations and types */
18-
namespace fs = boost::filesystem;
19+
namespace fs {
20+
21+
using namespace boost::filesystem;
22+
23+
/**
24+
* Path class wrapper to prepare application code for transition from
25+
* boost::filesystem library to std::filesystem implementation. The main
26+
* purpose of the class is to define fs::path::u8string() and fs::u8path()
27+
* functions not present in boost. It also blocks calls to the
28+
* fs::path(std::string) implicit constructor and the fs::path::string()
29+
* method, which worked well in the boost::filesystem implementation, but have
30+
* unsafe and unpredictable behavior on Windows in the std::filesystem
31+
* implementation (see implementation note in \ref PathToString for details).
32+
*/
33+
class path : public boost::filesystem::path
34+
{
35+
public:
36+
using boost::filesystem::path::path;
37+
38+
// Allow path objects arguments for compatibility.
39+
path(boost::filesystem::path path) : boost::filesystem::path::path(std::move(path)) {}
40+
path& operator=(boost::filesystem::path path) { boost::filesystem::path::operator=(std::move(path)); return *this; }
41+
path& operator/=(boost::filesystem::path path) { boost::filesystem::path::operator/=(std::move(path)); return *this; }
42+
43+
// Allow literal string arguments, which are safe as long as the literals are ASCII.
44+
path(const char* c) : boost::filesystem::path(c) {}
45+
path& operator=(const char* c) { boost::filesystem::path::operator=(c); return *this; }
46+
path& operator/=(const char* c) { boost::filesystem::path::operator/=(c); return *this; }
47+
path& append(const char* c) { boost::filesystem::path::append(c); return *this; }
48+
49+
// Disallow std::string arguments to avoid locale-dependent decoding on windows.
50+
path(std::string) = delete;
51+
path& operator=(std::string) = delete;
52+
path& operator/=(std::string) = delete;
53+
path& append(std::string) = delete;
54+
55+
// Disallow std::string conversion method to avoid locale-dependent encoding on windows.
56+
std::string string() const = delete;
57+
58+
// Define UTF-8 string conversion method not present in boost::filesystem but present in std::filesystem.
59+
std::string u8string() const { return boost::filesystem::path::string(); }
60+
};
61+
62+
// Define UTF-8 string conversion function not present in boost::filesystem but present in std::filesystem.
63+
static inline path u8path(const std::string& string)
64+
{
65+
return boost::filesystem::path(string);
66+
}
67+
68+
// Disallow implicit std::string conversion for system_complete to avoid
69+
// locale-dependent encoding on windows.
70+
static inline path system_complete(const path& p)
71+
{
72+
return boost::filesystem::system_complete(p);
73+
}
74+
75+
// Disallow implicit std::string conversion for exists to avoid
76+
// locale-dependent encoding on windows.
77+
static inline bool exists(const path& p)
78+
{
79+
return boost::filesystem::exists(p);
80+
}
81+
82+
// Allow explicit quoted stream I/O.
83+
static inline auto quoted(const std::string& s)
84+
{
85+
return boost::io::quoted(s, '&');
86+
}
87+
88+
// Allow safe path append operations.
89+
static inline path operator+(path p1, path p2)
90+
{
91+
p1 += std::move(p2);
92+
return p1;
93+
}
94+
95+
/**
96+
* Convert path object to byte string. On POSIX, paths natively are byte
97+
* strings so this is trivial. On Windows, paths natively are Unicode, so an
98+
* encoding step is necessary.
99+
*
100+
* The inverse of \ref PathToString is \ref PathFromString. The strings
101+
* returned and parsed by these functions can be used to call POSIX APIs, and
102+
* for roundtrip conversion, logging, and debugging. But they are not
103+
* guaranteed to be valid UTF-8, and are generally meant to be used internally,
104+
* not externally. When communicating with external programs and libraries that
105+
* require UTF-8, fs::path::u8string() and fs::u8path() methods can be used.
106+
* For other applications, if support for non UTF-8 paths is required, or if
107+
* higher-level JSON or XML or URI or C-style escapes are preferred, it may be
108+
* also be appropriate to use different path encoding functions.
109+
*
110+
* Implementation note: On Windows, the std::filesystem::path(string)
111+
* constructor and std::filesystem::path::string() method are not safe to use
112+
* here, because these methods encode the path using C++'s narrow multibyte
113+
* encoding, which on Windows corresponds to the current "code page", which is
114+
* unpredictable and typically not able to represent all valid paths. So
115+
* std::filesystem::path::u8string() and std::filesystem::u8path() functions
116+
* are used instead on Windows. On POSIX, u8string/u8path functions are not
117+
* safe to use because paths are not always valid UTF-8, so plain string
118+
* methods which do not transform the path there are used.
119+
*/
120+
static inline std::string PathToString(const path& path)
121+
{
122+
#ifdef WIN32
123+
return path.u8string();
124+
#else
125+
static_assert(std::is_same<path::string_type, std::string>::value, "PathToString not implemented on this platform");
126+
return path.boost::filesystem::path::string();
127+
#endif
128+
}
129+
130+
/**
131+
* Convert byte string to path object. Inverse of \ref PathToString.
132+
*/
133+
static inline path PathFromString(const std::string& string)
134+
{
135+
#ifdef WIN32
136+
return u8path(string);
137+
#else
138+
return boost::filesystem::path(string);
139+
#endif
140+
}
141+
} // namespace fs
19142

20143
/** Bridge operations to C stdio */
21144
namespace fsbridge {
@@ -103,4 +226,11 @@ namespace fsbridge {
103226
#endif // WIN32 && __GLIBCXX__
104227
};
105228

229+
// Disallow path operator<< formatting in tinyformat to avoid locale-dependent
230+
// encoding on windows.
231+
namespace tinyformat {
232+
template<> inline void formatValue(std::ostream&, const char*, const char*, int, const boost::filesystem::path&) = delete;
233+
template<> inline void formatValue(std::ostream&, const char*, const char*, int, const fs::path&) = delete;
234+
} // namespace tinyformat
235+
106236
#endif // BITCOIN_FS_H

src/i2p.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ void Session::GenerateAndSavePrivateKey(const Sock& sock)
328328
if (!WriteBinaryFile(m_private_key_file,
329329
std::string(m_private_key.begin(), m_private_key.end()))) {
330330
throw std::runtime_error(
331-
strprintf("Cannot save I2P private key to %s", m_private_key_file));
331+
strprintf("Cannot save I2P private key to %s", fs::quoted(fs::PathToString(m_private_key_file))));
332332
}
333333
}
334334

0 commit comments

Comments
 (0)