Skip to content

Commit 6791ccf

Browse files
committed
Fix canonicalization of non-ASCII paths on Windows
On Windows, existing_path_length counts UTF-16 code units but is used as an index into a UTF-8 string. This causes problems when canonicalizing paths containing non-ASCII characters. Fix indexing to be consistent with the data's encoding.
1 parent 9947942 commit 6791ccf

File tree

3 files changed

+36
-6
lines changed

3 files changed

+36
-6
lines changed

src/file-canonical.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -567,11 +567,14 @@ class windows_path_canonicalizer
567567

568568
canonical_path_result result() {
569569
// HACK(strager): Convert UTF-16 to UTF-8.
570-
// TODO(strager): existing_path_length_ is in UTF-16 code units, but it's
571-
// interpreted as UTF-8 code units! Fix by storing a std::wstring in
572-
// canonical_path.
573-
return canonical_path_result(std::filesystem::path(canonical_).string(),
574-
existing_path_length_);
570+
std::string canonical_utf_8 =
571+
to_string(std::filesystem::path(canonical_).u8string());
572+
std::size_t existing_path_length_utf_8 =
573+
count_utf_8_code_units(std::u16string_view(
574+
reinterpret_cast<const char16_t *>(canonical_.data()),
575+
existing_path_length_));
576+
return canonical_path_result(std::move(canonical_utf_8),
577+
existing_path_length_utf_8);
575578
}
576579

577580
quick_lint_js::result<void, canonicalizing_path_io_error>

src/temporary-directory.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <cstring>
99
#include <iostream>
1010
#include <limits.h>
11+
#include <quick-lint-js/char8.h>
1112
#include <quick-lint-js/have.h>
1213
#include <quick-lint-js/temporary-directory.h>
1314
#include <quick-lint-js/unreachable.h>
@@ -74,7 +75,7 @@ std::string make_temporary_directory() {
7475

7576
void create_directory(const std::string &path) {
7677
#if QLJS_HAVE_STD_FILESYSTEM
77-
std::filesystem::create_directory(path);
78+
std::filesystem::create_directory(to_string8(path));
7879
#else
7980
if (::mkdir(path.c_str(), 0755) != 0) {
8081
std::fprintf(stderr, "error: failed to create directory %s: %s\n",

test/test-file-canonical.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
using ::testing::AnyOf;
3434
using ::testing::HasSubstr;
3535
using ::testing::Not;
36+
using namespace std::literals::string_view_literals;
3637

3738
namespace quick_lint_js {
3839
namespace {
@@ -185,6 +186,31 @@ TEST_F(test_file_canonical,
185186
EXPECT_EQ(canonical->path(), temp_dir_canonical->path());
186187
}
187188

189+
TEST_F(test_file_canonical,
190+
canonical_path_to_non_existing_file_with_non_ascii_succeeds) {
191+
for (string8_view character8 : {u8"\u00e0"sv, u8"\u0800"sv}) {
192+
std::string character = to_string(character8);
193+
SCOPED_TRACE(character);
194+
195+
std::string temp_dir = this->make_temporary_directory();
196+
create_directory(temp_dir + "/parent" + character + "dir");
197+
result<canonical_path_result, canonicalize_path_io_error>
198+
parent_dir_canonical =
199+
canonicalize_path(temp_dir + "/parent" + character + "dir");
200+
ASSERT_TRUE(parent_dir_canonical.ok())
201+
<< parent_dir_canonical.error().to_string();
202+
203+
result<canonical_path_result, canonicalize_path_io_error> canonical =
204+
canonicalize_path(temp_dir + "/parent" + character + "dir/does-not-" +
205+
character + "exist/file" + character + "name.txt");
206+
ASSERT_TRUE(canonical.ok()) << canonical.error().to_string();
207+
208+
EXPECT_TRUE(canonical->have_missing_components());
209+
canonical->drop_missing_components();
210+
EXPECT_EQ(canonical->path(), parent_dir_canonical->path());
211+
}
212+
}
213+
188214
TEST_F(test_file_canonical, canonical_path_with_file_parent_fails) {
189215
std::string temp_dir = this->make_temporary_directory();
190216
write_file(temp_dir + "/file", u8"");

0 commit comments

Comments
 (0)