-
Notifications
You must be signed in to change notification settings - Fork 13.9k
Fix non-ASCII path handling in common argument parser on Windows #16611
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Fix non-ASCII path handling in common argument parser on Windows #16611
Conversation
| #ifdef _WIN32 | ||
| struct utf8_argv { | ||
| std::vector<std::string> buf; | ||
| std::vector<char*> ptrs; | ||
| }; | ||
|
|
||
| static utf8_argv make_utf8_argv() { | ||
| utf8_argv out; | ||
| int wargc = 0; | ||
| LPWSTR* wargv = CommandLineToArgvW(GetCommandLineW(), &wargc); | ||
| if (!wargv) return out; | ||
|
|
||
| out.buf.reserve(wargc); | ||
| for (int i = 0; i < wargc; ++i) { | ||
| int n = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, wargv[i], -1, nullptr, 0, nullptr, nullptr); | ||
| if (n <= 0) { out.buf.emplace_back(); continue; } | ||
| auto& s = out.buf.emplace_back(); | ||
| s.resize(static_cast<size_t>(n - 1)); | ||
| (void)WideCharToMultiByte(CP_UTF8, 0, wargv[i], -1, s.data(), n, nullptr, nullptr); | ||
| } | ||
| LocalFree(wargv); | ||
|
|
||
| out.ptrs.reserve(out.buf.size() + 1); | ||
| for (auto& s : out.buf) out.ptrs.push_back(s.data()); | ||
| out.ptrs.push_back(nullptr); | ||
| return out; | ||
| } | ||
| #endif | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| #ifdef _WIN32 | |
| struct utf8_argv { | |
| std::vector<std::string> buf; | |
| std::vector<char*> ptrs; | |
| }; | |
| static utf8_argv make_utf8_argv() { | |
| utf8_argv out; | |
| int wargc = 0; | |
| LPWSTR* wargv = CommandLineToArgvW(GetCommandLineW(), &wargc); | |
| if (!wargv) return out; | |
| out.buf.reserve(wargc); | |
| for (int i = 0; i < wargc; ++i) { | |
| int n = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, wargv[i], -1, nullptr, 0, nullptr, nullptr); | |
| if (n <= 0) { out.buf.emplace_back(); continue; } | |
| auto& s = out.buf.emplace_back(); | |
| s.resize(static_cast<size_t>(n - 1)); | |
| (void)WideCharToMultiByte(CP_UTF8, 0, wargv[i], -1, s.data(), n, nullptr, nullptr); | |
| } | |
| LocalFree(wargv); | |
| out.ptrs.reserve(out.buf.size() + 1); | |
| for (auto& s : out.buf) out.ptrs.push_back(s.data()); | |
| out.ptrs.push_back(nullptr); | |
| return out; | |
| } | |
| #endif | |
| #ifdef _WIN32 | |
| struct utf8_argv { | |
| std::vector<std::string> buf; | |
| std::vector<char *> ptrs; | |
| }; | |
| static utf8_argv make_utf8_argv() { | |
| utf8_argv out; | |
| int wargc = 0; | |
| LPWSTR * wargv = CommandLineToArgvW(GetCommandLineW(), &wargc); | |
| if (!wargv) return out; | |
| out.buf.reserve(wargc); | |
| for (int i = 0; i < wargc; ++i) { | |
| int n = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, wargv[i], -1, nullptr, 0, nullptr, nullptr); | |
| if (n <= 0) { out.buf.emplace_back(); continue; } | |
| auto & s = out.buf.emplace_back(); | |
| s.resize(static_cast<size_t>(n - 1)); | |
| (void)WideCharToMultiByte(CP_UTF8, 0, wargv[i], -1, s.data(), n, nullptr, nullptr); | |
| } | |
| LocalFree(wargv); | |
| out.ptrs.reserve(out.buf.size() + 1); | |
| for (auto & s : out.buf) out.ptrs.push_back(s.data()); | |
| out.ptrs.push_back(nullptr); | |
| return out; | |
| } | |
| #endif | |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I checked the line endings of the code committed to the repository just in case, and they appear to be LF, consistent with the rest of the files.
|
Docker Pull fails for some actions |
Summary
Fixed the behavior of the llama.cpp general argument parser when given non-ASCII paths.
On Windows, argv strings are received in the system’s ANSI code page (ACP), so conversion to UTF-8 is required.
Comparison of behavior
Windows 11 Pro 24H2
b6756
This PR
Note
This PR is related to
Reproducing the issue with non-ASCII .mmproj paths in existing CLI tools requires first applying the changes from this PR. (This doesn’t apply when the tools are called internally.)