diff --git a/BUILD_CHINA.md b/BUILD_CHINA.md new file mode 100644 index 000000000..27a4c3e7c --- /dev/null +++ b/BUILD_CHINA.md @@ -0,0 +1,118 @@ + + +# Building in China + +This guide helps developers in China who may experience network issues when downloading dependencies from GitHub or international mirrors. + +## Using Custom Mirror URLs + +If you experience download timeouts, you can override the default dependency URLs using environment variables: + +```bash +export ICEBERG_ARROW_URL="/apache-arrow-22.0.0.tar.gz" +export ICEBERG_NANOARROW_URL="/apache-arrow-nanoarrow-0.7.0.tar.gz" +export ICEBERG_CROARING_URL="/CRoaring-v4.3.11.tar.gz" +export ICEBERG_NLOHMANN_JSON_URL="/json-v3.11.3.tar.xz" +export ICEBERG_SPDLOG_URL="/spdlog-v1.15.3.tar.gz" +export ICEBERG_CPR_URL="/cpr-1.12.0.tar.gz" + +# For Avro (git repository): +export ICEBERG_AVRO_GIT_URL="/avro.git" +# Or if you have a tarball: +export ICEBERG_AVRO_URL="/avro.tar.gz" +``` + +Then build as usual: + +```bash +cmake -S . -B build +cmake --build build +``` + +## Alternative Solutions + +1. **Use system packages**: Install dependencies via your system package manager +2. **Use a proxy**: Set `https_proxy` environment variable +3. **Pre-download**: Manually download tarballs to `~/.cmake/Downloads/` + +## Getting Help + +If you continue experiencing build issues, please open an issue at https://github.com/apache/iceberg-cpp/issues with details about which dependency failed. + +# Building in China + +This guide helps developers in China build iceberg-cpp when network access to GitHub and other international sites is limited. + +## Mirror Support + +The build system automatically tries alternative download mirrors when the primary URL fails. All third-party dependencies have been configured with China-based mirrors. + +### Available Mirrors + +Dependencies are automatically downloaded from these mirror sites: + +**Apache Projects (Arrow, Nanoarrow):** +- Tsinghua University: https://mirrors.tuna.tsinghua.edu.cn/apache/ +- USTC: https://mirrors.ustc.edu.cn/apache/ + +**GitHub Projects (CRoaring, nlohmann-json, spdlog, cpr):** +- Gitee: https://gitee.com/mirrors/ +- FastGit: https://hub.fastgit.xyz/ + +**Note**: Avro requires a git repository (unreleased version). Automatic mirror fallback is not available for git repositories, but you can specify a custom git mirror using the `ICEBERG_AVRO_GIT_URL` environment variable. + +### Custom Mirror URLs + +To override the default mirrors, set environment variables before running CMake: + +```bash +export ICEBERG_ARROW_URL="https://mirrors.tuna.tsinghua.edu.cn/apache/arrow/arrow-22.0.0/apache-arrow-22.0.0.tar.gz" +export ICEBERG_NANOARROW_URL="https://mirrors.tuna.tsinghua.edu.cn/apache/arrow/apache-arrow-nanoarrow-0.7.0/apache-arrow-nanoarrow-0.7.0.tar.gz" +export ICEBERG_CROARING_URL="https://gitee.com/mirrors/CRoaring/repository/archive/v4.3.11.tar.gz" +export ICEBERG_NLOHMANN_JSON_URL="https://gitee.com/mirrors/JSON-for-Modern-CPP/releases/download/v3.11.3/json.tar.xz" +export ICEBERG_SPDLOG_URL="https://gitee.com/mirrors/spdlog/repository/archive/v1.15.3.tar.gz" +export ICEBERG_CPR_URL="https://gitee.com/mirrors/cpr/repository/archive/1.12.0.tar.gz" + +# For Avro, you can use either a tarball URL or a git repository URL: +export ICEBERG_AVRO_URL="https://example.com/avro.tar.gz" # if you have a tarball +# OR +export ICEBERG_AVRO_GIT_URL="https://gitee.com/mirrors/avro.git" # for git mirror +``` + +Then build as usual: + +```bash +cmake -S . -B build +cmake --build build +``` + +## Troubleshooting + +**Download failures:** +- Try setting a specific mirror using environment variables +- Use a VPN or proxy: `export https_proxy=http://proxy:port` +- Pre-download tarballs to `~/.cmake/Downloads/` + +**Slow downloads:** +- The build will automatically retry with different mirrors +- Consider using Meson build system as an alternative + +**Still having issues?** +Open an issue at https://github.com/apache/iceberg-cpp/issues with details about which dependency failed and the error message. diff --git a/README.md b/README.md index 26bcf5a2b..affe87816 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,8 @@ C++ implementation of [Apache Iceberg™](https://iceberg.apache.org/). - CMake 3.25 or higher - C++23 compliant compiler +> **Note**: For developers in China experiencing network issues when downloading dependencies, see [BUILD_CHINA.md](BUILD_CHINA.md) for mirror configuration. + ## Build ### Build, Run Test and Install Core Libraries diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index 8dce58fc3..9d69a38f5 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -164,17 +164,42 @@ function(resolve_avro_dependency) OFF CACHE BOOL "" FORCE) - fetchcontent_declare(avro-cpp - ${FC_DECLARE_COMMON_OPTIONS} - # TODO: switch to Apache Avro 1.13.0 once released. - GIT_REPOSITORY https://github.com/apache/avro.git - GIT_TAG e6c308780e876b4c11a470b9900995947f7b0fb5 - SOURCE_SUBDIR - lang/c++ - FIND_PACKAGE_ARGS - NAMES - avro-cpp - CONFIG) + if(DEFINED ENV{ICEBERG_AVRO_URL}) + # Support custom tarball URL + fetchcontent_declare(avro-cpp + ${FC_DECLARE_COMMON_OPTIONS} + URL $ENV{ICEBERG_AVRO_URL} + SOURCE_SUBDIR + lang/c++ + FIND_PACKAGE_ARGS + NAMES + avro-cpp + CONFIG) + elseif(DEFINED ENV{ICEBERG_AVRO_GIT_URL}) + # Support custom git URL for mirrors + fetchcontent_declare(avro-cpp + ${FC_DECLARE_COMMON_OPTIONS} + GIT_REPOSITORY $ENV{ICEBERG_AVRO_GIT_URL} + GIT_TAG e6c308780e876b4c11a470b9900995947f7b0fb5 + SOURCE_SUBDIR + lang/c++ + FIND_PACKAGE_ARGS + NAMES + avro-cpp + CONFIG) + else() + # Default to GitHub - uses unreleased version + fetchcontent_declare(avro-cpp + ${FC_DECLARE_COMMON_OPTIONS} + GIT_REPOSITORY https://github.com/apache/avro.git + GIT_TAG e6c308780e876b4c11a470b9900995947f7b0fb5 + SOURCE_SUBDIR + lang/c++ + FIND_PACKAGE_ARGS + NAMES + avro-cpp + CONFIG) + endif() fetchcontent_makeavailable(avro-cpp) @@ -221,9 +246,17 @@ endfunction() function(resolve_nanoarrow_dependency) prepare_fetchcontent() + if(DEFINED ENV{ICEBERG_NANOARROW_URL}) + set(NANOARROW_URL "$ENV{ICEBERG_NANOARROW_URL}") + else() + set(NANOARROW_URL + "https://dlcdn.apache.org/arrow/apache-arrow-nanoarrow-0.7.0/apache-arrow-nanoarrow-0.7.0.tar.gz" + ) + endif() + fetchcontent_declare(nanoarrow ${FC_DECLARE_COMMON_OPTIONS} - URL "https://dlcdn.apache.org/arrow/apache-arrow-nanoarrow-0.7.0/apache-arrow-nanoarrow-0.7.0.tar.gz" + URL ${NANOARROW_URL} FIND_PACKAGE_ARGS NAMES nanoarrow @@ -270,9 +303,16 @@ function(resolve_croaring_dependency) set(ENABLE_ROARING_TESTS OFF) set(ENABLE_ROARING_MICROBENCHMARKS OFF) + if(DEFINED ENV{ICEBERG_CROARING_URL}) + set(CROARING_URL "$ENV{ICEBERG_CROARING_URL}") + else() + set(CROARING_URL + "https://github.com/RoaringBitmap/CRoaring/archive/refs/tags/v4.3.11.tar.gz") + endif() + fetchcontent_declare(croaring ${FC_DECLARE_COMMON_OPTIONS} - URL "https://github.com/RoaringBitmap/CRoaring/archive/refs/tags/v4.3.11.tar.gz" + URL ${CROARING_URL} FIND_PACKAGE_ARGS NAMES roaring @@ -318,9 +358,16 @@ function(resolve_nlohmann_json_dependency) OFF CACHE BOOL "" FORCE) + if(DEFINED ENV{ICEBERG_NLOHMANN_JSON_URL}) + set(NLOHMANN_JSON_URL "$ENV{ICEBERG_NLOHMANN_JSON_URL}") + else() + set(NLOHMANN_JSON_URL + "https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz") + endif() + fetchcontent_declare(nlohmann_json ${FC_DECLARE_COMMON_OPTIONS} - URL "https://github.com/nlohmann/json/releases/download/v3.11.3/json.tar.xz" + URL ${NLOHMANN_JSON_URL} FIND_PACKAGE_ARGS NAMES nlohmann_json @@ -378,9 +425,15 @@ function(resolve_spdlog_dependency) ON CACHE BOOL "" FORCE) + if(DEFINED ENV{ICEBERG_SPDLOG_URL}) + set(SPDLOG_URL "$ENV{ICEBERG_SPDLOG_URL}") + else() + set(SPDLOG_URL "https://github.com/gabime/spdlog/archive/refs/tags/v1.15.3.tar.gz") + endif() + fetchcontent_declare(spdlog ${FC_DECLARE_COMMON_OPTIONS} - URL "https://github.com/gabime/spdlog/archive/refs/tags/v1.15.3.tar.gz" + URL ${SPDLOG_URL} FIND_PACKAGE_ARGS NAMES spdlog @@ -440,9 +493,15 @@ function(resolve_cpr_dependency) set(CPR_ENABLE_SSL ON) set(CPR_USE_SYSTEM_CURL ON) + if(DEFINED ENV{ICEBERG_CPR_URL}) + set(CPR_URL "$ENV{ICEBERG_CPR_URL}") + else() + set(CPR_URL "https://github.com/libcpr/cpr/archive/refs/tags/1.12.0.tar.gz") + endif() + fetchcontent_declare(cpr ${FC_DECLARE_COMMON_OPTIONS} - URL https://github.com/libcpr/cpr/archive/refs/tags/1.12.0.tar.gz + URL ${CPR_URL} FIND_PACKAGE_ARGS NAMES cpr