Skip to content

Commit d94ee30

Browse files
committed
Fix canonical_path::parent in the presence of non-ASCII
On Windows, if a canonical_path stores non-ASCII characters, the counting used in canonical_path::parent was inaccurate and caused path components to be truncated incorrectly. Fix counting to make canonical_path::parent work as expected. canonicalize_path still has encoding-related bugs on Windows. These will be fixed in future commits.
1 parent 3dc15ef commit d94ee30

File tree

2 files changed

+21
-15
lines changed

2 files changed

+21
-15
lines changed

src/file-canonical.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,17 +115,16 @@ canonical_path::canonical_path(std::string &&path) : path_(std::move(path)) {
115115
}
116116
std::reverse(this->path_lengths_.begin(), this->path_lengths_.end());
117117
#elif QLJS_PATHS_WIN32
118-
// TODO(strager): path_lengths_ is in UTF-16 code units, but it's interpreted
119-
// as UTF-8 code units! Fix by storing a std::wstring in canonical_path
120-
// instead of a std::string.
121118
// TODO(strager): Avoid string conversions and copying.
122119
std::optional<std::wstring> wpath = mbstring_to_wstring(this->path_.c_str());
123120
QLJS_ASSERT(wpath.has_value());
124121
for (;;) {
125122
HRESULT result = ::PathCchRemoveFileSpec(wpath->data(), wpath->size() + 1);
126123
switch (result) {
127124
case S_OK:
128-
this->path_lengths_.push_back(std::wcslen(wpath->data()));
125+
// TODO(strager): Avoid allocating just to count UTF-8 code units.
126+
this->path_lengths_.push_back(
127+
std::filesystem::path(wpath->c_str()).u8string().size());
129128
break;
130129
case S_FALSE:
131130
// Path is a root path already.

test/test-file-canonical.cpp

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,22 +1033,29 @@ TEST_F(test_file_canonical, parent_of_root_is_root_win32) {
10331033

10341034
TEST_F(test_file_canonical, parent_of_non_root_removes_component_win32) {
10351035
struct test_case {
1036-
const std::string_view before;
1037-
const std::string_view after;
1036+
const string8_view before;
1037+
const string8_view after;
10381038
};
10391039

10401040
for (const test_case& tc : {
1041-
test_case{R"(C:\hello)", R"(C:\)"},
1042-
test_case{R"(C:\dir\subdir)", R"(C:\dir)"},
1043-
test_case{R"(C:\a\b\c\d)", R"(C:\a\b\c)"},
1044-
test_case{R"(\\?\X:\hello)", R"(\\?\X:\)"},
1045-
test_case{R"(\\?\X:\dir\subdir)", R"(\\?\X:\dir)"},
1046-
test_case{R"(\\?\X:\a\b\c\d)", R"(\\?\X:\a\b\c)"},
1047-
test_case{R"(\\server\share\file)", R"(\\server\share)"},
1041+
test_case{u8R"(C:\hello)", u8R"(C:\)"},
1042+
test_case{u8R"(C:\dir\subdir)", u8R"(C:\dir)"},
1043+
test_case{u8R"(C:\a\b\c\d)", u8R"(C:\a\b\c)"},
1044+
test_case{u8R"(\\?\X:\hello)", u8R"(\\?\X:\)"},
1045+
test_case{u8R"(\\?\X:\dir\subdir)", u8R"(\\?\X:\dir)"},
1046+
test_case{u8R"(\\?\X:\a\b\c\d)", u8R"(\\?\X:\a\b\c)"},
1047+
test_case{u8R"(\\server\share\file)", u8R"(\\server\share)"},
1048+
test_case{u8"X:\\parent\u0080dir\\sub\u0080dir",
1049+
u8"X:\\parent\u0080dir"},
1050+
test_case{u8"X:\\parent\u0800dir\\sub\u0800dir",
1051+
u8"X:\\parent\u0800dir"},
1052+
test_case{u8"X:\\parent\U00108000dir\\sub\U00108000dir",
1053+
u8"X:\\parent\U00108000dir"},
10481054
}) {
1049-
canonical_path p(std::string(tc.before));
1055+
canonical_path p(to_string(tc.before));
10501056
EXPECT_TRUE(p.parent());
1051-
EXPECT_EQ(p.path(), tc.after) << "before = " << tc.before;
1057+
EXPECT_EQ(p.path(), to_string(tc.after))
1058+
<< "before = " << out_string8(tc.before);
10521059
}
10531060
}
10541061

0 commit comments

Comments
 (0)