@@ -33,9 +33,12 @@ from libcpp.memory cimport shared_ptr
3333from libcpp.map cimport map
3434from libcpp.utility cimport move
3535
36+ from typing import Dict, Union
37+ import datetime
3638import pathlib
3739import traceback
38-
40+ from types import ModuleType
41+ import sys
3942
4043pybool = type (True )
4144
@@ -127,7 +130,10 @@ cdef public api:
127130# Creator module #
128131# ##############################################################################
129132
133+ writer_module_name = f" {__name__}.writer"
134+
130135cdef class WritingBlob:
136+ __module__ = writer_module_name
131137 cdef zim.Blob c_blob
132138 cdef bytes ref_content
133139
@@ -145,18 +151,20 @@ cdef class WritingBlob:
145151
146152class Compression (enum .Enum ):
147153 """ Compression algorithms available to create ZIM files """
154+ __module__ = writer_module_name
148155 none = zim.CompressionType.zimcompNone
149156 lzma = zim.CompressionType.zimcompLzma
150157 zstd = zim.CompressionType.zimcompZstd
151158
152159
153160class Hint (enum .Enum ):
161+ __module__ = writer_module_name
154162 COMPRESS = zim.HintKeys.COMPRESS
155163 FRONT_ARTICLE = zim.HintKeys.FRONT_ARTICLE
156164
157165
158166
159- cdef class Creator :
167+ cdef class _Creator :
160168 """ Zim Creator
161169
162170 Attributes
@@ -167,6 +175,7 @@ cdef class Creator:
167175 path to create the ZIM file at
168176 _started : bool
169177 flag if the creator has started """
178+ __module__ = writer_module_name
170179
171180 cdef zim.ZimCreator c_creator
172181 cdef object _filename
@@ -288,6 +297,168 @@ cdef class Creator:
288297 def filename (self ):
289298 return self ._filename
290299
300+ class ContentProvider :
301+ __module__ = writer_module_name
302+ def __init__ (self ):
303+ self .generator = None
304+
305+ def get_size (self ) -> int:
306+ """Size of get_data's result in bytes"""
307+ raise NotImplementedError("get_size must be implemented.")
308+
309+ def feed(self ) -> WritingBlob:
310+ """Blob(s ) containing the complete content of the article.
311+ Must return an empty blob to tell writer no more content has to be written.
312+ Sum(size(blobs )) must be equals to `self.get_size()`
313+ """
314+ if self.generator is None:
315+ self.generator = self .gen_blob()
316+
317+ try:
318+ # We have to keep a ref to _blob to be sure gc do not del it while cpp is
319+ # using it
320+ self._blob = next(self .generator)
321+ except StopIteration:
322+ self._blob = WritingBlob(" " )
323+
324+ return self._blob
325+
326+ def gen_blob(self ):
327+ """ Generator yielding blobs for the content of the article"""
328+ raise NotImplementedError (" gen_blob (ro feed) must be implemented" )
329+
330+
331+ class StringProvider (ContentProvider ):
332+ __module__ = writer_module_name
333+ def __init__ (self , content ):
334+ super ().__init__()
335+ self .content = content.encode(" UTF-8" ) if isinstance (content, str ) else content
336+
337+ def get_size (self ):
338+ return len (self .content)
339+
340+ def gen_blob (self ):
341+ yield WritingBlob(self .content)
342+
343+
344+ class FileProvider (ContentProvider ):
345+ __module__ = writer_module_name
346+ def __init__ (self , filepath ):
347+ super ().__init__()
348+ self .filepath = filepath
349+ self .size = os.path.getsize(self .filepath)
350+
351+ def get_size (self ):
352+ return self .size
353+
354+ def gen_blob (self ):
355+ bsize = 1048576 # 1MiB chunk
356+ with open (self .filepath, " rb" ) as fh:
357+ res = fh.read(bsize)
358+ while res:
359+ yield WritingBlob(res)
360+ res = fh.read(bsize)
361+
362+
363+ class BaseWritingItem :
364+ """ Item stub to override
365+
366+ Pass a subclass of it to Creator.add_item()"""
367+ __module__ = writer_module_name
368+
369+ def __init__ (self ):
370+ self ._blob = None
371+
372+ def get_path (self ) -> str:
373+ """Full path of item"""
374+ raise NotImplementedError("get_path must be implemented.")
375+
376+ def get_title(self ) -> str:
377+ """Item title. Might be indexed and used in suggestions"""
378+ raise NotImplementedError("get_title must be implemented.")
379+
380+ def get_mimetype(self ) -> str:
381+ """MIME-type of the item's content."""
382+ raise NotImplementedError("get_mimetype must be implemented.")
383+
384+ def get_contentprovider(self ) -> ContentProvider:
385+ """ContentProvider containing the complete content of the item"""
386+ raise NotImplementedError("get_contentprovider must be implemented.")
387+
388+ def get_hints(self ) -> Dict[Hint , int]:
389+ raise NotImplementedError("get_hints must be implemented.")
390+
391+ def __repr__(self ) -> str:
392+ return (
393+ f"{self.__class__.__name__}(path = {self .get_path()}, "
394+ f"title = {self .get_title()})"
395+ )
396+
397+
398+ def pascalize(keyword: str ):
399+ """ Converts python case to pascal case.
400+ example: long_description -> LongDescription"""
401+ return " " .join(keyword.title().split(" _" ))
402+
403+
404+ class Creator (_Creator ):
405+ __module__ = writer_module_name
406+ def config_compression (self , compression: Compression ):
407+ if not isinstance (compression, Compression):
408+ compression = getattr (Compression, compression.lower())
409+ return super ().config_compression(compression)
410+
411+ def add_metadata (
412+ self , name: str , content: Union[str , bytes , datetime.date , datetime.datetime]
413+ ):
414+ name = pascalize(name)
415+ if name == " Date" and isinstance (content, (datetime.date, datetime.datetime)):
416+ content = content.strftime(" %Y -%m -%d " ).encode(" UTF-8" )
417+ if isinstance (content, str ):
418+ content = content.encode(" UTF-8" )
419+ super ().add_metadata(name = name, content = content)
420+
421+ def __repr__ (self ) -> str:
422+ return f"Creator(filename = {self .filename})"
423+
424+ writer_module_doc = """ libzim writer module
425+ - Creator to create ZIM files
426+ - Item to store ZIM articles metadata
427+ - ContentProvider to store an Item's content
428+ - Blob to store actual content
429+
430+ Usage:
431+ with Creator(pathlib.Path("myfile.zim")) as creator:
432+ creator.configVerbose(False)
433+ creator.add_metadata("Name", b"my name")
434+ # example
435+ creator.add_item(MyItemSubclass(path, title, mimetype, content)
436+ creator.setMainPath(path)"""
437+ writer_public_objects = [
438+ Creator,
439+ Compression,
440+ (' Blob' , WritingBlob),
441+ Hint,
442+ (' Item' , BaseWritingItem),
443+ ContentProvider,
444+ FileProvider,
445+ StringProvider,
446+ pascalize
447+ ]
448+ writer = ModuleType(writer_module_name, writer_module_doc)
449+ _all = []
450+ for obj in writer_public_objects:
451+ if isinstance(obj , tuple ):
452+ name = obj[0 ]
453+ obj = obj[1 ]
454+ else :
455+ name = obj.__name__
456+ setattr (writer, name, obj)
457+ _all.append(name)
458+ writer.__all__ = _all
459+ sys.modules[writer_module_name] = writer
460+
461+
291462# ##############################################################################
292463# Reader module #
293464# ##############################################################################
0 commit comments