diff --git a/Dockerfile b/Dockerfile index e6e6bfe..6fc0a95 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,6 +13,7 @@ RUN apt-get update \ lbzip2 \ gosu \ python3.12 \ + zstd \ curl \ && rm -rf /var/lib/apt/lists/* diff --git a/README.md b/README.md index ca54344..f939b43 100644 --- a/README.md +++ b/README.md @@ -58,22 +58,23 @@ docker compose up -d The container can be configured using the following environment variables: -| Variable | Parameters | Default | Description | -| ---------------------- | -------------------------------------- | -------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `UPDATE_STRATEGY` | `PARALLEL`, `SEQUENTIAL`, `DISABLED` | `SEQUENTIAL` | Controls how index updates are handled. `PARALLEL` downloads the new index in the background then swaps with minimal downtime (requires 2x space). `SEQUENTIAL` stops Photon, deletes the existing index, downloads the new one, then restarts. `DISABLED` prevents automatic updates. | -| `UPDATE_INTERVAL` | Time string (e.g., "720h", "30d") | `30d` | How often to check for updates. To reduce server load, it is recommended to set this to a long interval (e.g., `720h` for 30 days) or disable updates altogether if you do not need the latest data. | -| `REGION` | Region name, country code, or `planet` | `planet` | Optional region for a specific dataset. Can be a continent (`europe`, `asia`), individual country/region (`germany`, `usa`, `japan`), country code (`de`, `us`, `jp`), or `planet` for worldwide data. See [Available Regions](#available-regions) section for details. | -| `LOG_LEVEL` | `DEBUG`, `INFO`, `ERROR` | `INFO` | Controls logging verbosity. | -| `FORCE_UPDATE` | `TRUE`, `FALSE` | `FALSE` | Forces an index update on container startup, regardless of `UPDATE_STRATEGY`. | -| `DOWNLOAD_MAX_RETRIES` | Number | `3` | Maximum number of retries for failed downloads. | -| `INITIAL_DOWNLOAD` | `TRUE`, `FALSE` | `TRUE` | Controls whether the container performs the initial index download when the Photon data directory is empty. Useful for manual imports. | -| `BASE_URL` | Valid URL | `https://r2.koalasec.org/public` | Custom base URL for index data downloads. Should point to the parent directory of index files. The default has been changed to a community mirror to reduce load on the GraphHopper servers. | -| `SKIP_MD5_CHECK` | `TRUE`, `FALSE` | `FALSE` | Optionally skip MD5 verification of downloaded index files. | -| `FILE_URL` | URL to a .tar.bz2 file | - | Set a custom URL for the index file to be downloaded (e.g., "https://download1.graphhopper.com/public/experimental/photon-db-latest.tar.bz2"). This must be a tar.bz2 format. Make sure to set the `UPDATE_STRATEGY` to `DISABLED` when using this option. | -| `PHOTON_PARAMS` | Photon executable parameters | - | See `https://github.com/komoot/photon#running-photon.` | -| `APPRISE_URLS` | Comma-separated Apprise URLs | - | Optional notification URLs for [Apprise](https://github.com/caronc/apprise) to send status updates (e.g., download completion, errors). Supports multiple services like Pushover, Slack, email, etc. Example: `pover://user@token,mailto://user:pass@gmail.com` | -| `PUID` | User ID | 9011 | The User ID for the photon process. Set this to your host user's ID (`id -u`) to prevent permission errors when using bind mounts. | -| `PGID` | Group ID | 9011 | The Group ID for the photon process. Set this to your host group's ID (`id -g`) to prevent permission errors when using bind mounts. | +| Variable | Parameters | Default | Description | +|------------------------|-------------------------------------------------------| -------------------------------- |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `UPDATE_STRATEGY` | `PARALLEL`, `SEQUENTIAL`, `DISABLED` | `SEQUENTIAL` | Controls how index updates are handled. `PARALLEL` downloads the new index in the background then swaps with minimal downtime (requires 2x space). `SEQUENTIAL` stops Photon, deletes the existing index, downloads the new one, then restarts. `DISABLED` prevents automatic updates. | +| `UPDATE_INTERVAL` | Time string (e.g., "720h", "30d") | `30d` | How often to check for updates. To reduce server load, it is recommended to set this to a long interval (e.g., `720h` for 30 days) or disable updates altogether if you do not need the latest data. | +| `REGION` | Region name, country code, or `planet` | `planet` | Optional region for a specific dataset. Can be a continent (`europe`, `asia`), individual country/region (`germany`, `usa`, `japan`), country code (`de`, `us`, `jp`), or `planet` for worldwide data. See [Available Regions](#available-regions) section for details. | +| `LOG_LEVEL` | `DEBUG`, `INFO`, `ERROR` | `INFO` | Controls logging verbosity. | +| `FORCE_UPDATE` | `TRUE`, `FALSE` | `FALSE` | Forces an index update on container startup, regardless of `UPDATE_STRATEGY`. | +| `DOWNLOAD_MAX_RETRIES` | Number | `3` | Maximum number of retries for failed downloads. | +| `INITIAL_DOWNLOAD` | `TRUE`, `FALSE` | `TRUE` | Controls whether the container performs the initial index download when the Photon data directory is empty. Useful for manual imports. | +| `BASE_URL` | Valid URL | `https://r2.koalasec.org/public` | Custom base URL for index data downloads. Should point to the parent directory of index files. The default has been changed to a community mirror to reduce load on the GraphHopper servers. | +| `SKIP_MD5_CHECK` | `TRUE`, `FALSE` | `FALSE` | Optionally skip MD5 verification of downloaded index files. | +| `FILE_URL` | URL to a .tar.bz2/.jsonl.zst file | - | Set a custom URL for the index file to be downloaded (e.g., "https://download1.graphhopper.com/public/experimental/photon-db-latest.tar.bz2"). This must be a tar.bz2 or jsonl.zst format. Make sure to set the `UPDATE_STRATEGY` to `DISABLED` when using this option. | +| `PHOTON_PARAMS` | Photon executable parameters | - | See `https://github.com/komoot/photon#running-photon.` | +| `BUILD_PHOTON_PARAMS` | Photon executable parameters for jsonl.zst extraction | `-languages en,de,fr,es,it` | See `https://github.com/komoot/photon#importing-data-from-a-json-dump.` | +| `APPRISE_URLS` | Comma-separated Apprise URLs | - | Optional notification URLs for [Apprise](https://github.com/caronc/apprise) to send status updates (e.g., download completion, errors). Supports multiple services like Pushover, Slack, email, etc. Example: `pover://user@token,mailto://user:pass@gmail.com` | +| `PUID` | User ID | 9011 | The User ID for the photon process. Set this to your host user's ID (`id -u`) to prevent permission errors when using bind mounts. | +| `PGID` | Group ID | 9011 | The Group ID for the photon process. Set this to your host group's ID (`id -g`) to prevent permission errors when using bind mounts. | ## Available Regions diff --git a/src/downloader.py b/src/downloader.py index bc1485e..a3bb774 100644 --- a/src/downloader.py +++ b/src/downloader.py @@ -281,8 +281,11 @@ def sequential_update(): def download_index() -> str: - output_file = "photon-db-latest.tar.bz2" download_url = get_download_url() + if download_url.endswith(".jsonl.zst"): + output_file = "photon-data-dump.jsonl.zst" + else: + output_file = "photon-db-latest.tar.bz2" output = os.path.join(config.TEMP_DIR, output_file) @@ -296,29 +299,13 @@ def download_index() -> str: def download_md5(): - if config.REGION: - normalized = normalize_region(config.REGION) - region_info = get_region_info(config.REGION) - if not region_info: - raise ValueError(f"Unknown region: {config.REGION}") - - region_type = region_info["type"] - - if region_type == "planet": - md5_url = "/photon-db-planet-0.7OS-latest.tar.bz2.md5" - elif region_type == "continent": - md5_url = f"/{normalized}/photon-db-{normalized}-0.7OS-latest.tar.bz2.md5" - elif region_type == "sub-region": - continent = region_info["continent"] - md5_url = f"/{continent}/{normalized}/photon-db-{normalized}-0.7OS-latest.tar.bz2.md5" - else: - raise ValueError(f"Invalid region type: {region_type}") + download_url = get_download_url() + if download_url.endswith(".jsonl.zst"): + output_file = "photon-data-dump.jsonl.zst.md5" else: - md5_url = "/photon-db-planet-0.7OS-latest.tar.bz2.md5" - - download_url = config.BASE_URL + md5_url + output_file = "photon-db-latest.tar.bz2.md5" + download_url = f"{download_url}.md5" - output_file = "photon-db-latest.tar.bz2.md5" output = os.path.join(config.TEMP_DIR, output_file) if not download_file(download_url, output): diff --git a/src/filesystem.py b/src/filesystem.py index ed83bd3..4e635cd 100644 --- a/src/filesystem.py +++ b/src/filesystem.py @@ -21,8 +21,11 @@ def extract_index(index_file: str): if not os.path.exists(config.TEMP_DIR): logging.debug(f"Creating temp directory: {config.TEMP_DIR}") os.makedirs(config.TEMP_DIR, exist_ok=True) + if index_file.endswith(".jsonl.zst"): + install_command = f"zstd --stdout -d {index_file} | java -jar /photon/photon.jar -nominatim-import -import-file - -data-dir {config.TEMP_DIR} {config.BUILD_PHOTON_PARAMS}" + else: + install_command = f"lbzip2 -d -c {index_file} | tar x -C {config.TEMP_DIR}" - install_command = f"lbzip2 -d -c {index_file} | tar x -C {config.TEMP_DIR}" logging.debug(f"Extraction command: {install_command}") try: diff --git a/src/utils/config.py b/src/utils/config.py index 1cc0d28..72f0029 100644 --- a/src/utils/config.py +++ b/src/utils/config.py @@ -8,6 +8,7 @@ DOWNLOAD_MAX_RETRIES = os.getenv("DOWNLOAD_MAX_RETRIES", "3") FILE_URL = os.getenv("FILE_URL") PHOTON_PARAMS = os.getenv("PHOTON_PARAMS") +BUILD_PHOTON_PARAMS = os.getenv("BUILD_PHOTON_PARAMS", "-languages en,de,fr,es,it") JAVA_PARAMS = os.getenv("JAVA_PARAMS") LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") BASE_URL = os.getenv("BASE_URL", "https://r2.koalasec.org/public").rstrip("/")