From d6d89a7065c5cbe89ec442406ec2837b8a8058af Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 4 Nov 2025 20:50:42 +0000 Subject: [PATCH 1/6] Move BaseWritingItem so that it is available in _Creator --- libzim/libzim.pyx | 124 +++++++++++++++++++++++----------------------- 1 file changed, 61 insertions(+), 63 deletions(-) diff --git a/libzim/libzim.pyx b/libzim/libzim.pyx index 4744829..d14a507 100644 --- a/libzim/libzim.pyx +++ b/libzim/libzim.pyx @@ -254,6 +254,67 @@ class Hint(enum.Enum): FRONT_ARTICLE = zim.HintKeys.FRONT_ARTICLE +class BaseWritingItem: + """ + Data to be added to the archive. + + This is a stub to override. Pass a subclass of it to `Creator.add_item()` + """ + __module__ = writer_module_name + + def __init__(self): + self._blob = None + get_indexdata = None + + def get_path(self) -> str: + """Full path of item. + + The path must be absolute and unique. + + Returns: + Path of the item. + """ + raise NotImplementedError("get_path must be implemented.") + + def get_title(self) -> str: + """Item title. Might be indexed and used in suggestions. + + Returns: + Title of the item. + """ + raise NotImplementedError("get_title must be implemented.") + + def get_mimetype(self) -> str: + """MIME-type of the item's content. + + Returns: + Mimetype of the item. + """ + raise NotImplementedError("get_mimetype must be implemented.") + + def get_contentprovider(self) -> ContentProvider: + """ContentProvider containing the complete content of the item. + + Returns: + The content provider of the item. + """ + raise NotImplementedError("get_contentprovider must be implemented.") + + def get_hints(self) -> Dict[Hint, pyint]: + """Get the Hints that help the Creator decide how to handle this item. + + Hints affects compression, presence in suggestion, random and search. + + Returns: + Hints to help the Creator decide how to handle this item. + """ + raise NotImplementedError("get_hints must be implemented.") + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}(path={self.get_path()}, " + f"title={self.get_title()})" + ) cdef class _Creator: """ZIM Creator. @@ -644,69 +705,6 @@ class IndexData: return None -class BaseWritingItem: - """ - Data to be added to the archive. - - This is a stub to override. Pass a subclass of it to `Creator.add_item()` - """ - __module__ = writer_module_name - - def __init__(self): - self._blob = None - get_indexdata = None - - def get_path(self) -> str: - """Full path of item. - - The path must be absolute and unique. - - Returns: - Path of the item. - """ - raise NotImplementedError("get_path must be implemented.") - - def get_title(self) -> str: - """Item title. Might be indexed and used in suggestions. - - Returns: - Title of the item. - """ - raise NotImplementedError("get_title must be implemented.") - - def get_mimetype(self) -> str: - """MIME-type of the item's content. - - Returns: - Mimetype of the item. - """ - raise NotImplementedError("get_mimetype must be implemented.") - - def get_contentprovider(self) -> ContentProvider: - """ContentProvider containing the complete content of the item. - - Returns: - The content provider of the item. - """ - raise NotImplementedError("get_contentprovider must be implemented.") - - def get_hints(self) -> Dict[Hint, pyint]: - """Get the Hints that help the Creator decide how to handle this item. - - Hints affects compression, presence in suggestion, random and search. - - Returns: - Hints to help the Creator decide how to handle this item. - """ - raise NotImplementedError("get_hints must be implemented.") - - def __repr__(self) -> str: - return ( - f"{self.__class__.__name__}(path={self.get_path()}, " - f"title={self.get_title()})" - ) - - class Creator(_Creator): """Creator to create ZIM files.""" __module__ = writer_module_name From a84261205b0de4e8c01759d649e9479b6e2ea104 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 4 Nov 2025 20:51:06 +0000 Subject: [PATCH 2/6] Fix _Creator return types --- libzim/libzim.pyx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/libzim/libzim.pyx b/libzim/libzim.pyx index d14a507..598e0ba 100644 --- a/libzim/libzim.pyx +++ b/libzim/libzim.pyx @@ -344,7 +344,7 @@ cdef class _Creator: def __init__(self, filename: pathlib.Path): pass - def config_verbose(self, bool verbose: bool) -> Creator: + def config_verbose(self, bool verbose: bool) -> _Creator: """Set creator verbosity inside libzim (default: off). Args: @@ -358,7 +358,7 @@ cdef class _Creator: self.c_creator.configVerbose(verbose) return self - def config_compression(self, compression: Compression) -> Creator: + def config_compression(self, compression: Compression) -> _Creator: """Set compression algorithm to use. Check libzim for default setting. (Fall 2021 default: zstd). @@ -374,7 +374,7 @@ cdef class _Creator: self.c_creator.configCompression(zim.comp_from_int(compression.value)) return self - def config_clustersize(self, int size: pyint) -> Creator: + def config_clustersize(self, int size: pyint) -> _Creator: """Set size of created clusters. Check libzim for default setting. (Fall 2021 default: 2Mib). @@ -393,7 +393,7 @@ cdef class _Creator: self.c_creator.configClusterSize(size) return self - def config_indexing(self, bool indexing: bool, str language: str) -> Creator: + def config_indexing(self, bool indexing: bool, str language: str) -> _Creator: """Configures the full-text indexing feature. Args: @@ -408,7 +408,7 @@ cdef class _Creator: self.c_creator.configIndexing(indexing, language.encode('UTF-8')) return self - def config_nbworkers(self, int nbWorkers: pyint) -> Creator: + def config_nbworkers(self, int nbWorkers: pyint) -> _Creator: """Configures the number of threads to use for internal workers (default: 4). Args: @@ -422,7 +422,7 @@ cdef class _Creator: self.c_creator.configNbWorkers(nbWorkers) return self - def set_mainpath(self, str mainPath: str) -> Creator: + def set_mainpath(self, str mainPath: str) -> _Creator: """Set path of the main entry. Args: @@ -449,7 +449,7 @@ cdef class _Creator: cdef string _content = content self.c_creator.addIllustration(size, _content) -# def set_uuid(self, uuid) -> Creator: +# def set_uuid(self, uuid) -> _Creator: # self.c_creator.setUuid(uuid) def add_item(self, writer_item not None: BaseWritingItem): From 7c10d1b7a07c164877b706e8a76f019458eb0266 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 4 Nov 2025 20:51:30 +0000 Subject: [PATCH 3/6] Fix nogil position, must be last --- libzim/zim.pxd | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libzim/zim.pxd b/libzim/zim.pxd index 3ab094e..bbd25f4 100644 --- a/libzim/zim.pxd +++ b/libzim/zim.pxd @@ -66,14 +66,14 @@ cdef extern from "zim/writer/creator.h" namespace "zim::writer": void configClusterSize(int size) void configIndexing(bint indexing, string language) void configNbWorkers(int nbWorkers) - void startZimCreation(string filepath) nogil except +; - void addItem(shared_ptr[WriterItem] item) nogil except + - void addMetadata(string name, string content, string mimetype) nogil except + - void addRedirection(string path, string title, string targetpath, map[HintKeys, uint64_t] hints) nogil except + + void startZimCreation(string filepath) except + nogil + void addItem(shared_ptr[WriterItem] item) except + nogil + void addMetadata(string name, string content, string mimetype) except + nogil + void addRedirection(string path, string title, string targetpath, map[HintKeys, uint64_t] hints) except + nogil void addAlias(string path, string title, string targetpath, map[HintKeys, uint64_t] hints) except + nogil - void finishZimCreation() nogil except + + void finishZimCreation() except + nogil void setMainPath(string mainPath) - void addIllustration(unsigned int size, string content) nogil except + + void addIllustration(unsigned int size, string content) except + nogil cdef extern from "zim/search.h" namespace "zim": cdef cppclass Query: From 80f37629f749e88398aeafb3a511eeb16511c2ef Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 4 Nov 2025 20:51:50 +0000 Subject: [PATCH 4/6] Use nwe format for license in pyproject.toml --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 65a9712..efb0d99 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ authors = [ {name = "openZIM", email = "dev@kiwix.org"}, ] readme = "README.md" -license = {text = "GPL-3.0-or-later"} +license = "GPL-3.0-or-later" classifiers = [ "Development Status :: 5 - Production/Stable", "Topic :: Utilities", @@ -36,7 +36,6 @@ classifiers = [ "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", "Typing :: Stubs Only", - "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", "Operating System :: MacOS", "Operating System :: POSIX", ] From 9c2aaec18cfb74bbd0f1b5181d8336111a94ae58 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 4 Nov 2025 20:54:35 +0000 Subject: [PATCH 5/6] Move ContentProvider --- libzim/libzim.pyx | 87 ++++++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 43 deletions(-) diff --git a/libzim/libzim.pyx b/libzim/libzim.pyx index 598e0ba..e2ae38a 100644 --- a/libzim/libzim.pyx +++ b/libzim/libzim.pyx @@ -254,6 +254,50 @@ class Hint(enum.Enum): FRONT_ARTICLE = zim.HintKeys.FRONT_ARTICLE +class ContentProvider: + """ABC in charge of providing the content to add in the archive to the Creator.""" + __module__ = writer_module_name + def __init__(self): + self.generator = None + + def get_size(self) -> pyint: + """Size of `get_data`'s result in bytes. + + Returns: + int: The size of the data in bytes. + """ + raise NotImplementedError("get_size must be implemented.") + + def feed(self) -> WritingBlob: + """Blob(s) containing the complete content of the article. + + Must return an empty blob to tell writer no more content has to be written. + Sum(size(blobs)) must be equals to `self.get_size()` + + Returns: + WritingBlob: The content blob(s) of the article. + """ + if self.generator is None: + self.generator = self.gen_blob() + + try: + # We have to keep a ref to _blob to be sure gc do not del it while cpp is + # using it + self._blob = next(self.generator) + except StopIteration: + self._blob = WritingBlob("") + + return self._blob + + def gen_blob(self) -> Generator[WritingBlob, None, None]: + """Generator yielding blobs for the content of the article. + + Yields: + WritingBlob: A blob containing part of the article content. + """ + raise NotImplementedError("gen_blob (ro feed) must be implemented") + + class BaseWritingItem: """ Data to be added to the archive. @@ -564,49 +608,6 @@ cdef class _Creator: """ return self._filename -class ContentProvider: - """ABC in charge of providing the content to add in the archive to the Creator.""" - __module__ = writer_module_name - def __init__(self): - self.generator = None - - def get_size(self) -> pyint: - """Size of `get_data`'s result in bytes. - - Returns: - int: The size of the data in bytes. - """ - raise NotImplementedError("get_size must be implemented.") - - def feed(self) -> WritingBlob: - """Blob(s) containing the complete content of the article. - - Must return an empty blob to tell writer no more content has to be written. - Sum(size(blobs)) must be equals to `self.get_size()` - - Returns: - WritingBlob: The content blob(s) of the article. - """ - if self.generator is None: - self.generator = self.gen_blob() - - try: - # We have to keep a ref to _blob to be sure gc do not del it while cpp is - # using it - self._blob = next(self.generator) - except StopIteration: - self._blob = WritingBlob("") - - return self._blob - - def gen_blob(self) -> Generator[WritingBlob, None, None]: - """Generator yielding blobs for the content of the article. - - Yields: - WritingBlob: A blob containing part of the article content. - """ - raise NotImplementedError("gen_blob (ro feed) must be implemented") - class StringProvider(ContentProvider): """ContentProvider for a single encoded-or-not UTF-8 string.""" From dbfc2f9ed05bc7c9243f0d212091d5d04785e223 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 4 Nov 2025 20:55:17 +0000 Subject: [PATCH 6/6] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb9c05c..d0bfc2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add support for free-threaded CPython (3.13 and 3.14) - Upgrade Github CI Actions - Run tests on minimum supported platforms + more recent stable ones +- Fix various Cython warning and deprecation notices (#239) ## [3.7.0] - 2025-04-18