Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add support for free-threaded CPython (3.13 and 3.14)
- Upgrade Github CI Actions
- Run tests on minimum supported platforms + more recent stable ones
- Fix various Cython warning and deprecation notices (#239)

## [3.7.0] - 2025-04-18

Expand Down
225 changes: 112 additions & 113 deletions libzim/libzim.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,111 @@ class Hint(enum.Enum):
FRONT_ARTICLE = zim.HintKeys.FRONT_ARTICLE


class ContentProvider:
"""ABC in charge of providing the content to add in the archive to the Creator."""
__module__ = writer_module_name
def __init__(self):
self.generator = None

def get_size(self) -> pyint:
"""Size of `get_data`'s result in bytes.

Returns:
int: The size of the data in bytes.
"""
raise NotImplementedError("get_size must be implemented.")

def feed(self) -> WritingBlob:
"""Blob(s) containing the complete content of the article.

Must return an empty blob to tell writer no more content has to be written.
Sum(size(blobs)) must be equals to `self.get_size()`

Returns:
WritingBlob: The content blob(s) of the article.
"""
if self.generator is None:
self.generator = self.gen_blob()

try:
# We have to keep a ref to _blob to be sure gc do not del it while cpp is
# using it
self._blob = next(self.generator)
except StopIteration:
self._blob = WritingBlob("")

return self._blob

def gen_blob(self) -> Generator[WritingBlob, None, None]:
"""Generator yielding blobs for the content of the article.

Yields:
WritingBlob: A blob containing part of the article content.
"""
raise NotImplementedError("gen_blob (ro feed) must be implemented")


class BaseWritingItem:
"""
Data to be added to the archive.

This is a stub to override. Pass a subclass of it to `Creator.add_item()`
"""
__module__ = writer_module_name

def __init__(self):
self._blob = None
get_indexdata = None

def get_path(self) -> str:
"""Full path of item.

The path must be absolute and unique.

Returns:
Path of the item.
"""
raise NotImplementedError("get_path must be implemented.")

def get_title(self) -> str:
"""Item title. Might be indexed and used in suggestions.

Returns:
Title of the item.
"""
raise NotImplementedError("get_title must be implemented.")

def get_mimetype(self) -> str:
"""MIME-type of the item's content.

Returns:
Mimetype of the item.
"""
raise NotImplementedError("get_mimetype must be implemented.")

def get_contentprovider(self) -> ContentProvider:
"""ContentProvider containing the complete content of the item.

Returns:
The content provider of the item.
"""
raise NotImplementedError("get_contentprovider must be implemented.")

def get_hints(self) -> Dict[Hint, pyint]:
"""Get the Hints that help the Creator decide how to handle this item.

Hints affects compression, presence in suggestion, random and search.

Returns:
Hints to help the Creator decide how to handle this item.
"""
raise NotImplementedError("get_hints must be implemented.")

def __repr__(self) -> str:
return (
f"{self.__class__.__name__}(path={self.get_path()}, "
f"title={self.get_title()})"
)

cdef class _Creator:
"""ZIM Creator.
Expand Down Expand Up @@ -283,7 +388,7 @@ cdef class _Creator:
def __init__(self, filename: pathlib.Path):
pass

def config_verbose(self, bool verbose: bool) -> Creator:
def config_verbose(self, bool verbose: bool) -> _Creator:
"""Set creator verbosity inside libzim (default: off).

Args:
Expand All @@ -297,7 +402,7 @@ cdef class _Creator:
self.c_creator.configVerbose(verbose)
return self

def config_compression(self, compression: Compression) -> Creator:
def config_compression(self, compression: Compression) -> _Creator:
"""Set compression algorithm to use.

Check libzim for default setting. (Fall 2021 default: zstd).
Expand All @@ -313,7 +418,7 @@ cdef class _Creator:
self.c_creator.configCompression(zim.comp_from_int(compression.value))
return self

def config_clustersize(self, int size: pyint) -> Creator:
def config_clustersize(self, int size: pyint) -> _Creator:
"""Set size of created clusters.

Check libzim for default setting. (Fall 2021 default: 2Mib).
Expand All @@ -332,7 +437,7 @@ cdef class _Creator:
self.c_creator.configClusterSize(size)
return self

def config_indexing(self, bool indexing: bool, str language: str) -> Creator:
def config_indexing(self, bool indexing: bool, str language: str) -> _Creator:
"""Configures the full-text indexing feature.

Args:
Expand All @@ -347,7 +452,7 @@ cdef class _Creator:
self.c_creator.configIndexing(indexing, language.encode('UTF-8'))
return self

def config_nbworkers(self, int nbWorkers: pyint) -> Creator:
def config_nbworkers(self, int nbWorkers: pyint) -> _Creator:
"""Configures the number of threads to use for internal workers (default: 4).

Args:
Expand All @@ -361,7 +466,7 @@ cdef class _Creator:
self.c_creator.configNbWorkers(nbWorkers)
return self

def set_mainpath(self, str mainPath: str) -> Creator:
def set_mainpath(self, str mainPath: str) -> _Creator:
"""Set path of the main entry.

Args:
Expand All @@ -388,7 +493,7 @@ cdef class _Creator:
cdef string _content = content
self.c_creator.addIllustration(size, _content)

# def set_uuid(self, uuid) -> Creator:
# def set_uuid(self, uuid) -> _Creator:
# self.c_creator.setUuid(uuid)

def add_item(self, writer_item not None: BaseWritingItem):
Expand Down Expand Up @@ -503,49 +608,6 @@ cdef class _Creator:
"""
return self._filename

class ContentProvider:
"""ABC in charge of providing the content to add in the archive to the Creator."""
__module__ = writer_module_name
def __init__(self):
self.generator = None

def get_size(self) -> pyint:
"""Size of `get_data`'s result in bytes.

Returns:
int: The size of the data in bytes.
"""
raise NotImplementedError("get_size must be implemented.")

def feed(self) -> WritingBlob:
"""Blob(s) containing the complete content of the article.

Must return an empty blob to tell writer no more content has to be written.
Sum(size(blobs)) must be equals to `self.get_size()`

Returns:
WritingBlob: The content blob(s) of the article.
"""
if self.generator is None:
self.generator = self.gen_blob()

try:
# We have to keep a ref to _blob to be sure gc do not del it while cpp is
# using it
self._blob = next(self.generator)
except StopIteration:
self._blob = WritingBlob("")

return self._blob

def gen_blob(self) -> Generator[WritingBlob, None, None]:
"""Generator yielding blobs for the content of the article.

Yields:
WritingBlob: A blob containing part of the article content.
"""
raise NotImplementedError("gen_blob (ro feed) must be implemented")


class StringProvider(ContentProvider):
"""ContentProvider for a single encoded-or-not UTF-8 string."""
Expand Down Expand Up @@ -644,69 +706,6 @@ class IndexData:
return None


class BaseWritingItem:
"""
Data to be added to the archive.

This is a stub to override. Pass a subclass of it to `Creator.add_item()`
"""
__module__ = writer_module_name

def __init__(self):
self._blob = None
get_indexdata = None

def get_path(self) -> str:
"""Full path of item.

The path must be absolute and unique.

Returns:
Path of the item.
"""
raise NotImplementedError("get_path must be implemented.")

def get_title(self) -> str:
"""Item title. Might be indexed and used in suggestions.

Returns:
Title of the item.
"""
raise NotImplementedError("get_title must be implemented.")

def get_mimetype(self) -> str:
"""MIME-type of the item's content.

Returns:
Mimetype of the item.
"""
raise NotImplementedError("get_mimetype must be implemented.")

def get_contentprovider(self) -> ContentProvider:
"""ContentProvider containing the complete content of the item.

Returns:
The content provider of the item.
"""
raise NotImplementedError("get_contentprovider must be implemented.")

def get_hints(self) -> Dict[Hint, pyint]:
"""Get the Hints that help the Creator decide how to handle this item.

Hints affects compression, presence in suggestion, random and search.

Returns:
Hints to help the Creator decide how to handle this item.
"""
raise NotImplementedError("get_hints must be implemented.")

def __repr__(self) -> str:
return (
f"{self.__class__.__name__}(path={self.get_path()}, "
f"title={self.get_title()})"
)


class Creator(_Creator):
"""Creator to create ZIM files."""
__module__ = writer_module_name
Expand Down
12 changes: 6 additions & 6 deletions libzim/zim.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,14 @@ cdef extern from "zim/writer/creator.h" namespace "zim::writer":
void configClusterSize(int size)
void configIndexing(bint indexing, string language)
void configNbWorkers(int nbWorkers)
void startZimCreation(string filepath) nogil except +;
void addItem(shared_ptr[WriterItem] item) nogil except +
void addMetadata(string name, string content, string mimetype) nogil except +
void addRedirection(string path, string title, string targetpath, map[HintKeys, uint64_t] hints) nogil except +
void startZimCreation(string filepath) except + nogil
void addItem(shared_ptr[WriterItem] item) except + nogil
void addMetadata(string name, string content, string mimetype) except + nogil
void addRedirection(string path, string title, string targetpath, map[HintKeys, uint64_t] hints) except + nogil
void addAlias(string path, string title, string targetpath, map[HintKeys, uint64_t] hints) except + nogil
void finishZimCreation() nogil except +
void finishZimCreation() except + nogil
void setMainPath(string mainPath)
void addIllustration(unsigned int size, string content) nogil except +
void addIllustration(unsigned int size, string content) except + nogil

cdef extern from "zim/search.h" namespace "zim":
cdef cppclass Query:
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ authors = [
{name = "openZIM", email = "[email protected]"},
]
readme = "README.md"
license = {text = "GPL-3.0-or-later"}
license = "GPL-3.0-or-later"
classifiers = [
"Development Status :: 5 - Production/Stable",
"Topic :: Utilities",
Expand All @@ -36,7 +36,6 @@ classifiers = [
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
"Typing :: Stubs Only",
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
"Operating System :: MacOS",
"Operating System :: POSIX",
]
Expand Down
Loading