Skip to content

Commit 7ce090e

Browse files
authored
Handle UTF-8 paths correctly on Windows platform (dmlc#9443)
* Fix round-trip serialization with UTF-8 paths * Add compiler version check * Add comment to C API functions * Add Python tests * [CI] Updatre MacOS deployment target * Use std::filesystem instead of dmlc::TemporaryDirectory
1 parent 97fd520 commit 7ce090e

File tree

6 files changed

+48
-18
lines changed

6 files changed

+48
-18
lines changed

CMakeLists.txt

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,24 @@ endif ((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUA
1414

1515
message(STATUS "CMake version ${CMAKE_VERSION}")
1616

17-
if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
18-
message(FATAL_ERROR "GCC version must be at least 5.0!")
17+
# Check compiler versions
18+
# Use recent compilers to ensure that std::filesystem is available
19+
if(MSVC)
20+
if(MSVC_VERSION LESS 1920)
21+
message(FATAL_ERROR "Need Visual Studio 2019 or newer to build XGBoost")
22+
endif()
23+
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
24+
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8.1")
25+
message(FATAL_ERROR "Need GCC 8.1 or newer to build XGBoost")
26+
endif()
27+
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
28+
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "11.0")
29+
message(FATAL_ERROR "Need Xcode 11.0 (AppleClang 11.0) or newer to build XGBoost")
30+
endif()
31+
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
32+
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9.0")
33+
message(FATAL_ERROR "Need Clang 9.0 or newer to build XGBoost")
34+
endif()
1935
endif()
2036

2137
include(${xgboost_SOURCE_DIR}/cmake/FindPrefetchIntrinsics.cmake)

include/xgboost/c_api.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1221,7 +1221,7 @@ XGB_DLL int XGBoosterPredictFromCudaColumnar(BoosterHandle handle, char const *v
12211221
* \brief Load model from existing file
12221222
*
12231223
* \param handle handle
1224-
* \param fname File URI or file name.
1224+
* \param fname File URI or file name. The string must be UTF-8 encoded.
12251225
* \return 0 when success, -1 when failure happens
12261226
*/
12271227
XGB_DLL int XGBoosterLoadModel(BoosterHandle handle,
@@ -1230,7 +1230,7 @@ XGB_DLL int XGBoosterLoadModel(BoosterHandle handle,
12301230
* \brief Save model into existing file
12311231
*
12321232
* \param handle handle
1233-
* \param fname File URI or file name.
1233+
* \param fname File URI or file name. The string must be UTF-8 encoded.
12341234
* \return 0 when success, -1 when failure happens
12351235
*/
12361236
XGB_DLL int XGBoosterSaveModel(BoosterHandle handle,

src/common/io.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <cstddef> // for size_t
2929
#include <cstdint> // for int32_t, uint32_t
3030
#include <cstring> // for memcpy
31+
#include <filesystem> // for filesystem
3132
#include <fstream> // for ifstream
3233
#include <iterator> // for distance
3334
#include <limits> // for numeric_limits
@@ -153,7 +154,7 @@ std::string LoadSequentialFile(std::string uri, bool stream) {
153154
// Open in binary mode so that correct file size can be computed with
154155
// seekg(). This accommodates Windows platform:
155156
// https://docs.microsoft.com/en-us/cpp/standard-library/basic-istream-class?view=vs-2019#seekg
156-
std::ifstream ifs(uri, std::ios_base::binary | std::ios_base::in);
157+
std::ifstream ifs(std::filesystem::u8path(uri), std::ios_base::binary | std::ios_base::in);
157158
if (!ifs) {
158159
// https://stackoverflow.com/a/17338934
159160
OpenErr();

tests/ci_build/build_python_wheels.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ if [[ "$platform_id" == macosx_* ]]; then
3535
# MacOS, Intel
3636
wheel_tag=macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64
3737
cpython_ver=38
38-
export MACOSX_DEPLOYMENT_TARGET=10.13
38+
export MACOSX_DEPLOYMENT_TARGET=10.15
3939
#OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/11.1.0/download/osx-64/llvm-openmp-11.1.0-hda6cdc1_1.tar.bz2"
4040
OPENMP_URL="https://xgboost-ci-jenkins-artifacts.s3.us-west-2.amazonaws.com/llvm-openmp-11.1.0-hda6cdc1_1-osx-64.tar.bz2"
4141
else

tests/cpp/c_api/test_c_api.cc

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@
88
#include <xgboost/learner.h>
99
#include <xgboost/version_config.h>
1010

11-
#include <array> // for array
12-
#include <cstddef> // std::size_t
13-
#include <limits> // std::numeric_limits
14-
#include <string> // std::string
11+
#include <array> // for array
12+
#include <cstddef> // std::size_t
13+
#include <filesystem> // std::filesystem
14+
#include <limits> // std::numeric_limits
15+
#include <string> // std::string
1516
#include <vector>
1617

1718
#include "../../../src/c_api/c_api_error.h"
@@ -162,7 +163,7 @@ TEST(CAPI, ConfigIO) {
162163
TEST(CAPI, JsonModelIO) {
163164
size_t constexpr kRows = 10;
164165
size_t constexpr kCols = 10;
165-
dmlc::TemporaryDirectory tempdir;
166+
auto tempdir = std::filesystem::temp_directory_path();
166167

167168
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
168169
std::vector<std::shared_ptr<DMatrix>> mat {p_dmat};
@@ -178,19 +179,19 @@ TEST(CAPI, JsonModelIO) {
178179
learner->UpdateOneIter(0, p_dmat);
179180
BoosterHandle handle = learner.get();
180181

181-
std::string modelfile_0 = tempdir.path + "/model_0.json";
182-
XGBoosterSaveModel(handle, modelfile_0.c_str());
183-
XGBoosterLoadModel(handle, modelfile_0.c_str());
182+
auto modelfile_0 = tempdir / std::filesystem::u8path(u8"모델_0.json");
183+
XGBoosterSaveModel(handle, modelfile_0.u8string().c_str());
184+
XGBoosterLoadModel(handle, modelfile_0.u8string().c_str());
184185

185186
bst_ulong num_feature {0};
186187
ASSERT_EQ(XGBoosterGetNumFeature(handle, &num_feature), 0);
187188
ASSERT_EQ(num_feature, kCols);
188189

189-
std::string modelfile_1 = tempdir.path + "/model_1.json";
190-
XGBoosterSaveModel(handle, modelfile_1.c_str());
190+
auto modelfile_1 = tempdir / "model_1.json";
191+
XGBoosterSaveModel(handle, modelfile_1.u8string().c_str());
191192

192-
auto model_str_0 = common::LoadSequentialFile(modelfile_0);
193-
auto model_str_1 = common::LoadSequentialFile(modelfile_1);
193+
auto model_str_0 = common::LoadSequentialFile(modelfile_0.u8string());
194+
auto model_str_1 = common::LoadSequentialFile(modelfile_1.u8string());
194195

195196
ASSERT_EQ(model_str_0.front(), '{');
196197
ASSERT_EQ(model_str_0, model_str_1);

tests/python/test_basic.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
import os
3+
import pathlib
34
import tempfile
45
from pathlib import Path
56

@@ -167,6 +168,17 @@ def test_load_file_invalid(self):
167168
with pytest.raises(xgb.core.XGBoostError):
168169
xgb.Booster(model_file=u'不正なパス')
169170

171+
@pytest.mark.parametrize("path", ["모델.ubj", "がうる・ぐら.json"], ids=["path-0", "path-1"])
172+
def test_unicode_path(self, tmpdir, path):
173+
model_path = pathlib.Path(tmpdir) / path
174+
dtrain, _ = tm.load_agaricus(__file__)
175+
param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
176+
bst = xgb.train(param, dtrain, num_boost_round=2)
177+
bst.save_model(model_path)
178+
179+
bst2 = xgb.Booster(model_file=model_path)
180+
assert bst.get_dump(dump_format="text") == bst2.get_dump(dump_format="text")
181+
170182
def test_dmatrix_numpy_init_omp(self):
171183

172184
rows = [1000, 11326, 15000]

0 commit comments

Comments
 (0)