Skip to content

Commit 462b98f

Browse files
committed
Redefine submodule libzim.writer.
Dynamically define the module `libzim.writer`. - Move all code from `writer.py` inside `wrapper.pyx`. This is a plain copy. We could do better and merge `_Creator` and `Creator`. But it will be in a next commit. - Dynamically declare the submodule.
1 parent 7f0be5f commit 462b98f

File tree

2 files changed

+173
-171
lines changed

2 files changed

+173
-171
lines changed

libzim/wrapper.pyx

Lines changed: 173 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,12 @@ from libcpp.memory cimport shared_ptr
3333
from libcpp.map cimport map
3434
from libcpp.utility cimport move
3535

36+
from typing import Dict, Union
37+
import datetime
3638
import pathlib
3739
import traceback
38-
40+
from types import ModuleType
41+
import sys
3942

4043
pybool = type(True)
4144

@@ -127,7 +130,10 @@ cdef public api:
127130
#  Creator module #
128131
###############################################################################
129132

133+
writer_module_name = f"{__name__}.writer"
134+
130135
cdef class WritingBlob:
136+
__module__ = writer_module_name
131137
cdef zim.Blob c_blob
132138
cdef bytes ref_content
133139

@@ -145,18 +151,20 @@ cdef class WritingBlob:
145151

146152
class Compression(enum.Enum):
147153
""" Compression algorithms available to create ZIM files """
154+
__module__ = writer_module_name
148155
none = zim.CompressionType.zimcompNone
149156
lzma = zim.CompressionType.zimcompLzma
150157
zstd = zim.CompressionType.zimcompZstd
151158

152159

153160
class Hint(enum.Enum):
161+
__module__ = writer_module_name
154162
COMPRESS = zim.HintKeys.COMPRESS
155163
FRONT_ARTICLE = zim.HintKeys.FRONT_ARTICLE
156164

157165

158166

159-
cdef class Creator:
167+
cdef class _Creator:
160168
""" Zim Creator
161169
162170
Attributes
@@ -167,6 +175,7 @@ cdef class Creator:
167175
path to create the ZIM file at
168176
_started : bool
169177
flag if the creator has started """
178+
__module__ = writer_module_name
170179

171180
cdef zim.ZimCreator c_creator
172181
cdef object _filename
@@ -288,6 +297,168 @@ cdef class Creator:
288297
def filename(self):
289298
return self._filename
290299

300+
class ContentProvider:
301+
__module__ = writer_module_name
302+
def __init__(self):
303+
self.generator = None
304+
305+
def get_size(self) -> int:
306+
"""Size of get_data's result in bytes"""
307+
raise NotImplementedError("get_size must be implemented.")
308+
309+
def feed(self) -> WritingBlob:
310+
"""Blob(s) containing the complete content of the article.
311+
Must return an empty blob to tell writer no more content has to be written.
312+
Sum(size(blobs)) must be equals to `self.get_size()`
313+
"""
314+
if self.generator is None:
315+
self.generator = self.gen_blob()
316+
317+
try:
318+
# We have to keep a ref to _blob to be sure gc do not del it while cpp is
319+
# using it
320+
self._blob = next(self.generator)
321+
except StopIteration:
322+
self._blob = WritingBlob("")
323+
324+
return self._blob
325+
326+
def gen_blob(self):
327+
"""Generator yielding blobs for the content of the article"""
328+
raise NotImplementedError("gen_blob (ro feed) must be implemented")
329+
330+
331+
class StringProvider(ContentProvider):
332+
__module__ = writer_module_name
333+
def __init__(self, content):
334+
super().__init__()
335+
self.content = content.encode("UTF-8") if isinstance(content, str) else content
336+
337+
def get_size(self):
338+
return len(self.content)
339+
340+
def gen_blob(self):
341+
yield WritingBlob(self.content)
342+
343+
344+
class FileProvider(ContentProvider):
345+
__module__ = writer_module_name
346+
def __init__(self, filepath):
347+
super().__init__()
348+
self.filepath = filepath
349+
self.size = os.path.getsize(self.filepath)
350+
351+
def get_size(self):
352+
return self.size
353+
354+
def gen_blob(self):
355+
bsize = 1048576 # 1MiB chunk
356+
with open(self.filepath, "rb") as fh:
357+
res = fh.read(bsize)
358+
while res:
359+
yield WritingBlob(res)
360+
res = fh.read(bsize)
361+
362+
363+
class BaseWritingItem:
364+
"""Item stub to override
365+
366+
Pass a subclass of it to Creator.add_item()"""
367+
__module__ = writer_module_name
368+
369+
def __init__(self):
370+
self._blob = None
371+
372+
def get_path(self) -> str:
373+
"""Full path of item"""
374+
raise NotImplementedError("get_path must be implemented.")
375+
376+
def get_title(self) -> str:
377+
"""Item title. Might be indexed and used in suggestions"""
378+
raise NotImplementedError("get_title must be implemented.")
379+
380+
def get_mimetype(self) -> str:
381+
"""MIME-type of the item's content."""
382+
raise NotImplementedError("get_mimetype must be implemented.")
383+
384+
def get_contentprovider(self) -> ContentProvider:
385+
"""ContentProvider containing the complete content of the item"""
386+
raise NotImplementedError("get_contentprovider must be implemented.")
387+
388+
def get_hints(self) -> Dict[Hint, int]:
389+
raise NotImplementedError("get_hints must be implemented.")
390+
391+
def __repr__(self) -> str:
392+
return (
393+
f"{self.__class__.__name__}(path={self.get_path()}, "
394+
f"title={self.get_title()})"
395+
)
396+
397+
398+
def pascalize(keyword: str):
399+
"""Converts python case to pascal case.
400+
example: long_description -> LongDescription"""
401+
return "".join(keyword.title().split("_"))
402+
403+
404+
class Creator(_Creator):
405+
__module__ = writer_module_name
406+
def config_compression(self, compression: Compression):
407+
if not isinstance(compression, Compression):
408+
compression = getattr(Compression, compression.lower())
409+
return super().config_compression(compression)
410+
411+
def add_metadata(
412+
self, name: str, content: Union[str, bytes, datetime.date, datetime.datetime]
413+
):
414+
name = pascalize(name)
415+
if name == "Date" and isinstance(content, (datetime.date, datetime.datetime)):
416+
content = content.strftime("%Y-%m-%d").encode("UTF-8")
417+
if isinstance(content, str):
418+
content = content.encode("UTF-8")
419+
super().add_metadata(name=name, content=content)
420+
421+
def __repr__(self) -> str:
422+
return f"Creator(filename={self.filename})"
423+
424+
writer_module_doc = """ libzim writer module
425+
- Creator to create ZIM files
426+
- Item to store ZIM articles metadata
427+
- ContentProvider to store an Item's content
428+
- Blob to store actual content
429+
430+
Usage:
431+
with Creator(pathlib.Path("myfile.zim")) as creator:
432+
creator.configVerbose(False)
433+
creator.add_metadata("Name", b"my name")
434+
# example
435+
creator.add_item(MyItemSubclass(path, title, mimetype, content)
436+
creator.setMainPath(path)"""
437+
writer_public_objects = [
438+
Creator,
439+
Compression,
440+
('Blob', WritingBlob),
441+
Hint,
442+
('Item', BaseWritingItem),
443+
ContentProvider,
444+
FileProvider,
445+
StringProvider,
446+
pascalize
447+
]
448+
writer = ModuleType(writer_module_name, writer_module_doc)
449+
_all = []
450+
for obj in writer_public_objects:
451+
if isinstance(obj, tuple):
452+
name = obj[0]
453+
obj = obj[1]
454+
else:
455+
name = obj.__name__
456+
setattr(writer, name, obj)
457+
_all.append(name)
458+
writer.__all__ = _all
459+
sys.modules[writer_module_name] = writer
460+
461+
291462
###############################################################################
292463
#  Reader module #
293464
###############################################################################

libzim/writer.py

Lines changed: 0 additions & 169 deletions
This file was deleted.

0 commit comments

Comments
 (0)