diff --git a/.github/workflows/CI-wheels.yaml b/.github/workflows/CI-wheels.yaml index 79a8703..469fe0a 100644 --- a/.github/workflows/CI-wheels.yaml +++ b/.github/workflows/CI-wheels.yaml @@ -7,7 +7,7 @@ on: - main env: - LIBZIM_DL_VERSION: "9.3.0-1" + LIBZIM_DL_VERSION: "9.4.0-1" MACOSX_DEPLOYMENT_TARGET: "13.0" CIBW_ENVIRONMENT_PASS_LINUX: "LIBZIM_DL_VERSION" CIBW_BUILD_VERBOSITY: "3" diff --git a/.github/workflows/Publish.yaml b/.github/workflows/Publish.yaml index a692ab2..6fe4eb0 100644 --- a/.github/workflows/Publish.yaml +++ b/.github/workflows/Publish.yaml @@ -6,7 +6,7 @@ on: - published env: - LIBZIM_DL_VERSION: "9.3.0-1" + LIBZIM_DL_VERSION: "9.4.0-1" MACOSX_DEPLOYMENT_TARGET: "13.0" CIBW_ENVIRONMENT_PASS_LINUX: "LIBZIM_DL_VERSION" # APPLE_SIGNING_KEYCHAIN_PATH set in prepare keychain step diff --git a/.github/workflows/QA.yaml b/.github/workflows/QA.yaml index ea14c54..6381b96 100644 --- a/.github/workflows/QA.yaml +++ b/.github/workflows/QA.yaml @@ -2,7 +2,7 @@ name: QA on: [push] env: - LIBZIM_DL_VERSION: "9.3.0-1" + LIBZIM_DL_VERSION: "9.4.0-1" MACOSX_DEPLOYMENT_TARGET: "13.0" jobs: diff --git a/.github/workflows/Tests.yaml b/.github/workflows/Tests.yaml index f1a84f2..9e1750d 100644 --- a/.github/workflows/Tests.yaml +++ b/.github/workflows/Tests.yaml @@ -2,7 +2,7 @@ name: Tests on: [push] env: - LIBZIM_DL_VERSION: "9.3.0-1" + LIBZIM_DL_VERSION: "9.4.0-1" MACOSX_DEPLOYMENT_TARGET: "13.0" # we want cython traces for coverage PROFILE: "1" diff --git a/CHANGELOG.md b/CHANGELOG.md index d0bfc2d..44f8d1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Upgrade Github CI Actions - Run tests on minimum supported platforms + more recent stable ones - Fix various Cython warning and deprecation notices (#239) +- libzim 9.4.0 Cache Control API + - remove `Archive.dirent_lookup_cache_max_size`, does not exists anymore in libzim + - move `Archive.cluster_cache_max_size` and `Archive.cluster_cache_current_size` to methods outside of `Archive` object: `get_cluster_cache_max_size`, `set_cluster_cache_max_size`, `get_cluster_cache_current_size` (size is now in bytes) + - deprecate usage of `get_illustration_sizes()` + ## [3.7.0] - 2025-04-18 diff --git a/libzim/libwrapper.h b/libzim/libwrapper.h index 7da1b47..c66eba6 100644 --- a/libzim/libwrapper.h +++ b/libzim/libwrapper.h @@ -172,14 +172,9 @@ class Archive : public Wrapper FORWARD(bool, hasTitleIndex) FORWARD(bool, hasChecksum) FORWARD(bool, check) - FORWARD(zim::size_type, getClusterCacheMaxSize) - FORWARD(zim::size_type, getClusterCacheCurrentSize) - FORWARD(void, setClusterCacheMaxSize) FORWARD(zim::size_type, getDirentCacheMaxSize) FORWARD(zim::size_type, getDirentCacheCurrentSize) FORWARD(void, setDirentCacheMaxSize) - FORWARD(zim::size_type, getDirentLookupCacheMaxSize) - FORWARD(void, setDirentLookupCacheMaxSize) }; class SearchResultSet : public Wrapper diff --git a/libzim/libzim.pyx b/libzim/libzim.pyx index e2ae38a..db63e4c 100644 --- a/libzim/libzim.pyx +++ b/libzim/libzim.pyx @@ -39,6 +39,7 @@ import os import pathlib import sys import traceback +import warnings from collections import OrderedDict from types import ModuleType from typing import Dict, Generator, Iterator, List, Optional, Set, TextIO, Tuple, Union @@ -1304,9 +1305,18 @@ cdef class Archive: def get_illustration_sizes(self) -> Set[pyint]: """Sizes for which an illustration is available (@1 scale only). + .. deprecated:: 3.8.0 + Use :meth:`get_illustration_infos` instead for full illustration metadata + including width, height, and scale information. + Returns: The set of available sizes of the illustration. """ + warnings.warn( + "get_illustration_sizes() is deprecated, use get_illustration_infos() instead", + DeprecationWarning, + stacklevel=2 + ) return self.c_archive.getIllustrationSizes() def has_illustration(self, size: pyint = None) -> pybool: @@ -1332,37 +1342,6 @@ cdef class Archive: except RuntimeError as e: raise KeyError(str(e)) - @property - def cluster_cache_max_size(self) -> pyint: - """Maximum size of the cluster cache. - - Returns: - (int): maximum number of clusters stored in the cache. - """ - return self.c_archive.getClusterCacheMaxSize() - - @cluster_cache_max_size.setter - def cluster_cache_max_size(self, nb_clusters: pyint): - """Set the size of the cluster cache. - - If the new size is lower than the number of currently stored clusters - some clusters will be dropped from cache to respect the new size. - - Args: - nb_clusters (int): maximum number of clusters stored in the cache - """ - - self.c_archive.setClusterCacheMaxSize(nb_clusters) - - @property - def cluster_cache_current_size(self) -> pyint: - """Size of the cluster cache. - - Returns: - (int): number of clusters currently stored in the cache. - """ - return self.c_archive.getClusterCacheCurrentSize() - @property def dirent_cache_max_size(self) -> pyint: """Maximum size of the dirent cache. @@ -1393,36 +1372,38 @@ cdef class Archive: """ return self.c_archive.getDirentCacheCurrentSize() - @property - def dirent_lookup_cache_max_size(self) -> pyint: - """Size of the dirent lookup cache. + def __repr__(self) -> str: + return f"{self.__class__.__name__}(filename={self.filename})" - The returned size returns the default size or the last set size. - This may not correspond to the actual size of the dirent lookup cache. - See set_dirent_lookup_cache_max_size for more information. - Returns: - (int): maximum number of sub ranges created in the lookup cache. - """ - return self.c_archive.getDirentLookupCacheMaxSize() +def get_cluster_cache_max_size() -> pyint: + """Get the maximum size of the cluster cache. - @dirent_lookup_cache_max_size.setter - def dirent_lookup_cache_max_size(self, nb_ranges: pyint): - """Set the size of the dirent lookup cache. + Returns: + (int): the maximum memory size used by the cluster cache (in bytes). + """ + return zim.getClusterCacheMaxSize() - Contrary to other set__cache_max_size, this method is useless - once the lookup cache is created. - The lookup cache is created at first access to a entry in the archive. - So this method must be called before any access to content (including metadata). - It is best to call this method first, just after the archive creation. +def set_cluster_cache_max_size(size_in_bytes: pyint): + """Set the size of the cluster cache. - Args: - nb_ranges (int): maximum number of sub ranges created in the lookup cache. - """ - self.c_archive.setDirentLookupCacheMaxSize(nb_ranges) + If the new size is lower than the number of currently stored clusters + some clusters will be dropped from cache to respect the new size. + + Args: + size_in_bytes (int): the memory limit (in bytes) for the cluster cache. + """ + + zim.setClusterCacheMaxSize(size_in_bytes) + +def get_cluster_cache_current_size() -> pyint: + """Get the current size of the cluster cache. + + Returns: + (int): the current memory size (in bytes) used by the cluster cache. + """ + return zim.getClusterCacheCurrentSize() - def __repr__(self) -> str: - return f"{self.__class__.__name__}(filename={self.filename})" reader_module_doc = """libzim reader module @@ -1442,6 +1423,9 @@ reader_public_objects = [ Archive, Entry, Item, + get_cluster_cache_max_size, + set_cluster_cache_max_size, + get_cluster_cache_current_size, ] reader = create_module(reader_module_name, reader_module_doc, reader_public_objects) diff --git a/libzim/reader.pyi b/libzim/reader.pyi index e0f1cb5..143e099 100644 --- a/libzim/reader.pyi +++ b/libzim/reader.pyi @@ -78,19 +78,13 @@ class Archive: def has_illustration(self, size: int | None = None) -> bool: ... def get_illustration_item(self, size: int | None = None) -> Item: ... @property - def cluster_cache_max_size(self) -> int: ... - @cluster_cache_max_size.setter - def cluster_cache_max_size(self, nb_clusters: int): ... - @property - def cluster_cache_current_size(self) -> int: ... - @property def dirent_cache_max_size(self) -> int: ... @dirent_cache_max_size.setter def dirent_cache_max_size(self, nb_dirents: int): ... @property def dirent_cache_current_size(self) -> int: ... - @property - def dirent_lookup_cache_max_size(self) -> int: ... - @dirent_lookup_cache_max_size.setter - def dirent_lookup_cache_max_size(self, nb_ranges: int): ... def __repr__(self) -> str: ... + +def get_cluster_cache_max_size() -> int: ... +def set_cluster_cache_max_size(nb_clusters: int): ... +def get_cluster_cache_current_size() -> int: ... diff --git a/libzim/zim.pxd b/libzim/zim.pxd index bbd25f4..691f0e9 100644 --- a/libzim/zim.pxd +++ b/libzim/zim.pxd @@ -178,14 +178,9 @@ cdef extern from "libwrapper.h" namespace "wrapper": bool hasChecksum() except + bool check() except + - uint64_t getClusterCacheMaxSize() except + - uint64_t getClusterCacheCurrentSize() except + - void setClusterCacheMaxSize(uint64_t nbClusters) except + uint64_t getDirentCacheMaxSize() except + uint64_t getDirentCacheCurrentSize() except + void setDirentCacheMaxSize(uint64_t nbDirents) except + - uint64_t getDirentLookupCacheMaxSize() except + - void setDirentLookupCacheMaxSize(uint64_t nbRanges) except + cdef cppclass Searcher: Searcher() @@ -233,3 +228,8 @@ cdef extern from "libwrapper.h" namespace "wrapper": cdef extern from "zim/version.h" namespace "zim": cdef vector[pair[string, string]] getVersions() + +cdef extern from "zim/archive.h" namespace "zim": + cdef uint64_t getClusterCacheMaxSize() except + + cdef uint64_t getClusterCacheCurrentSize() except + + cdef void setClusterCacheMaxSize(uint64_t sizeInB) except + \ No newline at end of file diff --git a/setup.py b/setup.py index 32f2ef3..0f7426b 100755 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ class Config: - libzim_dl_version: str = os.getenv("LIBZIM_DL_VERSION", "9.3.0-1") + libzim_dl_version: str = os.getenv("LIBZIM_DL_VERSION", "9.4.0-1") use_system_libzim: bool = bool(os.getenv("USE_SYSTEM_LIBZIM") or False) download_libzim: bool = not bool(os.getenv("DONT_DOWNLOAD_LIBZIM") or False) diff --git a/tests/test_libzim_creator.py b/tests/test_libzim_creator.py index 3bc627f..ef00932 100644 --- a/tests/test_libzim_creator.py +++ b/tests/test_libzim_creator.py @@ -369,7 +369,8 @@ def test_creator_illustration(fpath, favicon_data): assert zim.has_illustration(128) is False assert bytes(zim.get_illustration_item().content) == favicon_data assert bytes(zim.get_illustration_item(96).content) == favicon_data - assert zim.get_illustration_sizes() == {48, 96} + with pytest.warns(DeprecationWarning, match="get_illustration_sizes.*deprecated"): + assert zim.get_illustration_sizes() == {48, 96} def test_creator_additem(fpath, lipsum_item): diff --git a/tests/test_libzim_reader.py b/tests/test_libzim_reader.py index 2e0ca28..e8e0bd3 100644 --- a/tests/test_libzim_reader.py +++ b/tests/test_libzim_reader.py @@ -9,7 +9,13 @@ import pytest import libzim.writer # pyright: ignore [reportMissingModuleSource] -from libzim.reader import Archive, Entry # pyright: ignore [reportMissingModuleSource] +from libzim.reader import ( # pyright: ignore [reportMissingModuleSource] + Archive, + Entry, + get_cluster_cache_current_size, + get_cluster_cache_max_size, + set_cluster_cache_max_size, +) from libzim.search import Query, Searcher # pyright: ignore [reportMissingModuleSource] from libzim.suggestion import ( # pyright: ignore [reportMissingModuleSource] SuggestionSearcher, @@ -621,28 +627,25 @@ def test_reader_get_random_entry(all_zims): @pytest.mark.parametrize(*parametrize_for(["filename"])) def test_cluster_cache(all_zims, filename): zim = Archive(all_zims / filename) - default_value = 16 - new_value = 1 - empty_value = 0 + default_value = 536870912 # 512M + new_value = 1024 - assert zim.cluster_cache_max_size == default_value + assert get_cluster_cache_max_size() == default_value - zim.cluster_cache_max_size = new_value - assert zim.cluster_cache_max_size == new_value + # modify cluster cache max size + set_cluster_cache_max_size(new_value) + assert get_cluster_cache_max_size() == new_value # test index access for index in range(0, zim.entry_count - 1): bytes(zim._get_entry_by_id(index).get_item().content) - assert zim.cluster_cache_current_size <= new_value - - zim.cluster_cache_max_size = empty_value - assert zim.cluster_cache_max_size == empty_value - - for index in range(0, zim.entry_count - 1): - bytes(zim._get_entry_by_id(index).get_item().content) + # check current size is not too big (not really relevant since cache keeps at least + # one cluster in memory, so this value depends on maximum cluster size + assert get_cluster_cache_current_size() <= new_value - assert zim.cluster_cache_current_size == empty_value + # restore default value for next tests + set_cluster_cache_max_size(default_value) @skip_if_offline @@ -671,26 +674,5 @@ def test_dirent_cache(all_zims, filename): for index in range(0, zim.entry_count - 1): bytes(zim._get_entry_by_id(index).get_item().content) - assert zim.dirent_cache_current_size == empty_value - - -@skip_if_offline -@pytest.mark.parametrize(*parametrize_for(["filename"])) -def test_dirent_lookup_cache(all_zims, filename): - zim = Archive(all_zims / filename) - default_value = 1024 - new_value = 2 - empty_value = 0 - - assert zim.dirent_lookup_cache_max_size == default_value - - zim.dirent_lookup_cache_max_size = new_value - assert zim.dirent_lookup_cache_max_size == new_value - - # test index access - for index in range(0, zim.entry_count - 1): - bytes(zim._get_entry_by_id(index).get_item().content) - - # setting after reading records the value but it has no use - zim.dirent_lookup_cache_max_size = empty_value - assert zim.dirent_lookup_cache_max_size == empty_value + # always at least one entry is kept in cache unless ZIM is empty + assert zim.dirent_cache_current_size == (1 if zim.entry_count else 0)