Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/CI-wheels.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
- main

env:
LIBZIM_DL_VERSION: "9.3.0-1"
LIBZIM_DL_VERSION: "9.4.0-1"
MACOSX_DEPLOYMENT_TARGET: "13.0"
CIBW_ENVIRONMENT_PASS_LINUX: "LIBZIM_DL_VERSION"
CIBW_BUILD_VERBOSITY: "3"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/Publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ on:
- published

env:
LIBZIM_DL_VERSION: "9.3.0-1"
LIBZIM_DL_VERSION: "9.4.0-1"
MACOSX_DEPLOYMENT_TARGET: "13.0"
CIBW_ENVIRONMENT_PASS_LINUX: "LIBZIM_DL_VERSION"
# APPLE_SIGNING_KEYCHAIN_PATH set in prepare keychain step
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/QA.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: QA
on: [push]

env:
LIBZIM_DL_VERSION: "9.3.0-1"
LIBZIM_DL_VERSION: "9.4.0-1"
MACOSX_DEPLOYMENT_TARGET: "13.0"

jobs:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/Tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Tests
on: [push]

env:
LIBZIM_DL_VERSION: "9.3.0-1"
LIBZIM_DL_VERSION: "9.4.0-1"
MACOSX_DEPLOYMENT_TARGET: "13.0"
# we want cython traces for coverage
PROFILE: "1"
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Upgrade Github CI Actions
- Run tests on minimum supported platforms + more recent stable ones
- Fix various Cython warning and deprecation notices (#239)
- libzim 9.4.0 Cache Control API
- remove `Archive.dirent_lookup_cache_max_size`, does not exists anymore in libzim
- move `Archive.cluster_cache_max_size` and `Archive.cluster_cache_current_size` to methods outside of `Archive` object: `get_cluster_cache_max_size`, `set_cluster_cache_max_size`, `get_cluster_cache_current_size` (size is now in bytes)
- deprecate usage of `get_illustration_sizes()`


## [3.7.0] - 2025-04-18

Expand Down
5 changes: 0 additions & 5 deletions libzim/libwrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,14 +172,9 @@ class Archive : public Wrapper<zim::Archive>
FORWARD(bool, hasTitleIndex)
FORWARD(bool, hasChecksum)
FORWARD(bool, check)
FORWARD(zim::size_type, getClusterCacheMaxSize)
FORWARD(zim::size_type, getClusterCacheCurrentSize)
FORWARD(void, setClusterCacheMaxSize)
FORWARD(zim::size_type, getDirentCacheMaxSize)
FORWARD(zim::size_type, getDirentCacheCurrentSize)
FORWARD(void, setDirentCacheMaxSize)
FORWARD(zim::size_type, getDirentLookupCacheMaxSize)
FORWARD(void, setDirentLookupCacheMaxSize)
};

class SearchResultSet : public Wrapper<zim::SearchResultSet>
Expand Down
94 changes: 39 additions & 55 deletions libzim/libzim.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import os
import pathlib
import sys
import traceback
import warnings
from collections import OrderedDict
from types import ModuleType
from typing import Dict, Generator, Iterator, List, Optional, Set, TextIO, Tuple, Union
Expand Down Expand Up @@ -1304,9 +1305,18 @@ cdef class Archive:
def get_illustration_sizes(self) -> Set[pyint]:
"""Sizes for which an illustration is available (@1 scale only).

.. deprecated:: 3.8.0
Use :meth:`get_illustration_infos` instead for full illustration metadata
including width, height, and scale information.

Returns:
The set of available sizes of the illustration.
"""
warnings.warn(
"get_illustration_sizes() is deprecated, use get_illustration_infos() instead",
DeprecationWarning,
stacklevel=2
)
return self.c_archive.getIllustrationSizes()

def has_illustration(self, size: pyint = None) -> pybool:
Expand All @@ -1332,37 +1342,6 @@ cdef class Archive:
except RuntimeError as e:
raise KeyError(str(e))

@property
def cluster_cache_max_size(self) -> pyint:
"""Maximum size of the cluster cache.

Returns:
(int): maximum number of clusters stored in the cache.
"""
return self.c_archive.getClusterCacheMaxSize()

@cluster_cache_max_size.setter
def cluster_cache_max_size(self, nb_clusters: pyint):
"""Set the size of the cluster cache.

If the new size is lower than the number of currently stored clusters
some clusters will be dropped from cache to respect the new size.

Args:
nb_clusters (int): maximum number of clusters stored in the cache
"""

self.c_archive.setClusterCacheMaxSize(nb_clusters)

@property
def cluster_cache_current_size(self) -> pyint:
"""Size of the cluster cache.

Returns:
(int): number of clusters currently stored in the cache.
"""
return self.c_archive.getClusterCacheCurrentSize()

@property
def dirent_cache_max_size(self) -> pyint:
"""Maximum size of the dirent cache.
Expand Down Expand Up @@ -1393,36 +1372,38 @@ cdef class Archive:
"""
return self.c_archive.getDirentCacheCurrentSize()

@property
def dirent_lookup_cache_max_size(self) -> pyint:
"""Size of the dirent lookup cache.
def __repr__(self) -> str:
return f"{self.__class__.__name__}(filename={self.filename})"

The returned size returns the default size or the last set size.
This may not correspond to the actual size of the dirent lookup cache.
See set_dirent_lookup_cache_max_size for more information.

Returns:
(int): maximum number of sub ranges created in the lookup cache.
"""
return self.c_archive.getDirentLookupCacheMaxSize()
def get_cluster_cache_max_size() -> pyint:
"""Get the maximum size of the cluster cache.

@dirent_lookup_cache_max_size.setter
def dirent_lookup_cache_max_size(self, nb_ranges: pyint):
"""Set the size of the dirent lookup cache.
Returns:
(int): the maximum memory size used by the cluster cache (in bytes).
"""
return zim.getClusterCacheMaxSize()

Contrary to other set_<foo>_cache_max_size, this method is useless
once the lookup cache is created.
The lookup cache is created at first access to a entry in the archive.
So this method must be called before any access to content (including metadata).
It is best to call this method first, just after the archive creation.
def set_cluster_cache_max_size(size_in_bytes: pyint):
"""Set the size of the cluster cache.

Args:
nb_ranges (int): maximum number of sub ranges created in the lookup cache.
"""
self.c_archive.setDirentLookupCacheMaxSize(nb_ranges)
If the new size is lower than the number of currently stored clusters
some clusters will be dropped from cache to respect the new size.

Args:
size_in_bytes (int): the memory limit (in bytes) for the cluster cache.
"""

zim.setClusterCacheMaxSize(size_in_bytes)

def get_cluster_cache_current_size() -> pyint:
"""Get the current size of the cluster cache.

Returns:
(int): the current memory size (in bytes) used by the cluster cache.
"""
return zim.getClusterCacheCurrentSize()

def __repr__(self) -> str:
return f"{self.__class__.__name__}(filename={self.filename})"

reader_module_doc = """libzim reader module

Expand All @@ -1442,6 +1423,9 @@ reader_public_objects = [
Archive,
Entry,
Item,
get_cluster_cache_max_size,
set_cluster_cache_max_size,
get_cluster_cache_current_size,
]
reader = create_module(reader_module_name, reader_module_doc, reader_public_objects)

Expand Down
14 changes: 4 additions & 10 deletions libzim/reader.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -78,19 +78,13 @@ class Archive:
def has_illustration(self, size: int | None = None) -> bool: ...
def get_illustration_item(self, size: int | None = None) -> Item: ...
@property
def cluster_cache_max_size(self) -> int: ...
@cluster_cache_max_size.setter
def cluster_cache_max_size(self, nb_clusters: int): ...
@property
def cluster_cache_current_size(self) -> int: ...
@property
def dirent_cache_max_size(self) -> int: ...
@dirent_cache_max_size.setter
def dirent_cache_max_size(self, nb_dirents: int): ...
@property
def dirent_cache_current_size(self) -> int: ...
@property
def dirent_lookup_cache_max_size(self) -> int: ...
@dirent_lookup_cache_max_size.setter
def dirent_lookup_cache_max_size(self, nb_ranges: int): ...
def __repr__(self) -> str: ...

def get_cluster_cache_max_size() -> int: ...
def set_cluster_cache_max_size(nb_clusters: int): ...
def get_cluster_cache_current_size() -> int: ...
10 changes: 5 additions & 5 deletions libzim/zim.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -178,14 +178,9 @@ cdef extern from "libwrapper.h" namespace "wrapper":
bool hasChecksum() except +
bool check() except +

uint64_t getClusterCacheMaxSize() except +
uint64_t getClusterCacheCurrentSize() except +
void setClusterCacheMaxSize(uint64_t nbClusters) except +
uint64_t getDirentCacheMaxSize() except +
uint64_t getDirentCacheCurrentSize() except +
void setDirentCacheMaxSize(uint64_t nbDirents) except +
uint64_t getDirentLookupCacheMaxSize() except +
void setDirentLookupCacheMaxSize(uint64_t nbRanges) except +

cdef cppclass Searcher:
Searcher()
Expand Down Expand Up @@ -233,3 +228,8 @@ cdef extern from "libwrapper.h" namespace "wrapper":

cdef extern from "zim/version.h" namespace "zim":
cdef vector[pair[string, string]] getVersions()

cdef extern from "zim/archive.h" namespace "zim":
cdef uint64_t getClusterCacheMaxSize() except +
cdef uint64_t getClusterCacheCurrentSize() except +
cdef void setClusterCacheMaxSize(uint64_t sizeInB) except +
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@


class Config:
libzim_dl_version: str = os.getenv("LIBZIM_DL_VERSION", "9.3.0-1")
libzim_dl_version: str = os.getenv("LIBZIM_DL_VERSION", "9.4.0-1")
use_system_libzim: bool = bool(os.getenv("USE_SYSTEM_LIBZIM") or False)
download_libzim: bool = not bool(os.getenv("DONT_DOWNLOAD_LIBZIM") or False)

Expand Down
3 changes: 2 additions & 1 deletion tests/test_libzim_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,8 @@ def test_creator_illustration(fpath, favicon_data):
assert zim.has_illustration(128) is False
assert bytes(zim.get_illustration_item().content) == favicon_data
assert bytes(zim.get_illustration_item(96).content) == favicon_data
assert zim.get_illustration_sizes() == {48, 96}
with pytest.warns(DeprecationWarning, match="get_illustration_sizes.*deprecated"):
assert zim.get_illustration_sizes() == {48, 96}


def test_creator_additem(fpath, lipsum_item):
Expand Down
58 changes: 20 additions & 38 deletions tests/test_libzim_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@
import pytest

import libzim.writer # pyright: ignore [reportMissingModuleSource]
from libzim.reader import Archive, Entry # pyright: ignore [reportMissingModuleSource]
from libzim.reader import ( # pyright: ignore [reportMissingModuleSource]
Archive,
Entry,
get_cluster_cache_current_size,
get_cluster_cache_max_size,
set_cluster_cache_max_size,
)
from libzim.search import Query, Searcher # pyright: ignore [reportMissingModuleSource]
from libzim.suggestion import ( # pyright: ignore [reportMissingModuleSource]
SuggestionSearcher,
Expand Down Expand Up @@ -621,28 +627,25 @@ def test_reader_get_random_entry(all_zims):
@pytest.mark.parametrize(*parametrize_for(["filename"]))
def test_cluster_cache(all_zims, filename):
zim = Archive(all_zims / filename)
default_value = 16
new_value = 1
empty_value = 0
default_value = 536870912 # 512M
new_value = 1024

assert zim.cluster_cache_max_size == default_value
assert get_cluster_cache_max_size() == default_value

zim.cluster_cache_max_size = new_value
assert zim.cluster_cache_max_size == new_value
# modify cluster cache max size
set_cluster_cache_max_size(new_value)
assert get_cluster_cache_max_size() == new_value

# test index access
for index in range(0, zim.entry_count - 1):
bytes(zim._get_entry_by_id(index).get_item().content)

assert zim.cluster_cache_current_size <= new_value

zim.cluster_cache_max_size = empty_value
assert zim.cluster_cache_max_size == empty_value

for index in range(0, zim.entry_count - 1):
bytes(zim._get_entry_by_id(index).get_item().content)
# check current size is not too big (not really relevant since cache keeps at least
# one cluster in memory, so this value depends on maximum cluster size
assert get_cluster_cache_current_size() <= new_value

assert zim.cluster_cache_current_size == empty_value
# restore default value for next tests
set_cluster_cache_max_size(default_value)


@skip_if_offline
Expand Down Expand Up @@ -671,26 +674,5 @@ def test_dirent_cache(all_zims, filename):
for index in range(0, zim.entry_count - 1):
bytes(zim._get_entry_by_id(index).get_item().content)

assert zim.dirent_cache_current_size == empty_value


@skip_if_offline
@pytest.mark.parametrize(*parametrize_for(["filename"]))
def test_dirent_lookup_cache(all_zims, filename):
zim = Archive(all_zims / filename)
default_value = 1024
new_value = 2
empty_value = 0

assert zim.dirent_lookup_cache_max_size == default_value

zim.dirent_lookup_cache_max_size = new_value
assert zim.dirent_lookup_cache_max_size == new_value

# test index access
for index in range(0, zim.entry_count - 1):
bytes(zim._get_entry_by_id(index).get_item().content)

# setting after reading records the value but it has no use
zim.dirent_lookup_cache_max_size = empty_value
assert zim.dirent_lookup_cache_max_size == empty_value
# always at least one entry is kept in cache unless ZIM is empty
assert zim.dirent_cache_current_size == (1 if zim.entry_count else 0)
Loading