1+ """ libzim writer module
2+ - Creator to create ZIM files
3+ - Article to store ZIM articles metadata
4+ - Blob to store ZIM article content
5+ Usage:
6+ with Creator(pathlib.Path("myfile.zim"), main_page="welcome.html") as xf:
7+ article = MyArticleSubclass(
8+ url="A/welcome.html",
9+ title="My Title",
10+ content=Blob("My content"))
11+ zf.add_article(article)
12+ zf.update_metadata(tags="new;demo") """
13+
114# This file is part of python-libzim
215# (see https://github.com/libzim/python-libzim)
316#
1932
2033import pathlib
2134import datetime
22- from collections import defaultdict
35+ import collections
2336
2437from .wrapper import Creator as _Creator
2538from .wrapper import WritingBlob as Blob
2841
2942
3043class Article :
44+ """ Article stub to override
45+
46+ Pass a subclass of it to Creator.add_article() """
47+
3148 def __init__ (self ):
3249 self ._blob = None
3350
34- def get_url (self ):
51+ def get_url (self ) -> str :
52+ """ Full URL of article including namespace """
3553 raise NotImplementedError
3654
37- def get_title (self ):
55+ def get_title (self ) -> str :
56+ """ Article title. Might be indexed and used in suggestions """
3857 raise NotImplementedError
3958
40- def is_redirect (self ):
59+ def is_redirect (self ) -> bool :
60+ """ Whether this redirects to another article (cf. redirec_url) """
4161 raise NotImplementedError
4262
43- def get_mime_type (self ):
63+ def get_mime_type (self ) -> str :
64+ """ MIME-type of the article's content. A/ namespace reserved to text/html """
4465 raise NotImplementedError
4566
46- def get_filename (self ):
67+ def get_filename (self ) -> str :
68+ """ Filename to get content from. Blank string "" if not used """
4769 raise NotImplementedError
4870
49- def should_compress (self ):
71+ def should_compress (self ) -> bool :
72+ """ Whether the article's content should be compressed or not """
5073 raise NotImplementedError
5174
52- def should_index (self ):
75+ def should_index (self ) -> bool :
76+ """ Whether the article's content should be indexed or not """
5377 raise NotImplementedError
5478
55- def redirect_url (self ):
79+ def redirect_url (self ) -> str :
80+ """ Full URL including namespace of another article """
5681 raise NotImplementedError
5782
58- def _get_data (self ):
83+ def _get_data (self ) -> Blob :
84+ """ Internal data-retrieval with a cache to the content's pointer
85+
86+ You don't need to override this """
5987 if self ._blob is None :
6088 self ._blob = self .get_data ()
6189 return self ._blob
6290
63- def get_data (self ):
91+ def get_data (self ) -> Blob :
92+ """ Blob containing the complete content of the article """
6493 raise NotImplementedError
6594
66- def __repr__ (self ):
95+ def __repr__ (self ) -> str :
6796 return f"{ self .__class__ .__name__ } (url={ self .get_url ()} , title={ self .get_title ()} )"
6897
6998
7099class MetadataArticle (Article ):
71- def __init__ (self , url , metadata_content ):
100+ """ Simple Article sub-class for key-value articles on M/ metadata namespace """
101+
102+ def __init__ (self , url : str , metadata_content : str ):
72103 Article .__init__ (self )
73104 self .url = url
74105 self .metadata_content = metadata_content
75106
76- def is_redirect (self ):
107+ def is_redirect (self ) -> bool :
77108 return False
78109
79- def get_url (self ):
110+ def get_url (self ) -> str :
80111 return f"M/{ self .url } "
81112
82- def get_title (self ):
113+ def get_title (self ) -> str :
83114 return ""
84115
85- def get_mime_type (self ):
116+ def get_mime_type (self ) -> str :
86117 return "text/plain"
87118
88- def get_filename (self ):
119+ def get_filename (self ) -> str :
89120 return ""
90121
91- def should_compress (self ):
122+ def should_compress (self ) -> bool :
92123 return True
93124
94- def should_index (self ):
125+ def should_index (self ) -> bool :
95126 return False
96127
97- def get_data (self ):
128+ def get_data (self ) -> Blob :
98129 return Blob (self .metadata_content )
99130
100131
101- def pascalize (keyword ):
102- """ Converts python case to pascal case. example: long_description-> LongDescription """
103- return "" .join (keyword .title ().split ("_" ))
104-
105-
106132class Creator :
107- """
108- A class to represent a Zim Creator.
109-
110- Attributes
111- ----------
112- *c_creator : zim.Creator
113- a pointer to the C++ Creator object
114- _finalized : bool
115- flag if the creator was finalized
116- _filename : pathlib.Path
117- Zim file path
118- _main_page : str
119- Zim file main page
120- _index_language : str
121- Zim file Index language
122- _min_chunk_size : str
123- Zim file minimum chunk size
124- _article_counter
125- Zim file article counter
126- _metadata
127- Zim file metadata
128- """
129-
130- def __init__ (self , filename , main_page , index_language = "eng" , min_chunk_size = 2048 ):
133+ """ Zim Creator.
134+
135+ Attributes
136+ ----------
137+ *_creatorWrapper : wrapper.ZimCreatorWrapper
138+ a pointer to the C++ Creator object wrapper
139+ filename : pathlib.Path
140+ Zim file path
141+ main_page : str
142+ Zim file main page (without namespace)
143+ language : str
144+ Zim file Index language
145+ _article_counter
146+ Zim file article counter
147+ _metadata
148+ Zim file metadata """
149+
150+ def __init__ (
151+ self , filename : pathlib .Path , main_page : str , index_language : str = "eng" , min_chunk_size : int = 2048 ,
152+ ):
153+ """ Creates a ZIM Creator
154+
155+ Parameters
156+ ----------
157+ filename : Path to create the ZIM file at
158+ main_page: ZIM file main article URL (without namespace, must be in A/)
159+ index_language: content language to inform indexer with (ISO-639-3)
160+ min_chunk_size: minimum size of chunks for compression """
161+
131162 self ._creatorWrapper = _Creator (str (filename ), main_page , index_language , min_chunk_size )
132163 self .filename = pathlib .Path (filename )
133164 self .main_page = main_page
134165 self .language = index_language
135166 self ._metadata = {}
136- self ._article_counter = defaultdict (int )
167+ self ._article_counter = collections . defaultdict (int )
137168 self .update_metadata (date = datetime .date .today (), language = index_language )
169+ self ._closed = False
138170
139171 def __enter__ (self ):
140172 return self
@@ -143,38 +175,59 @@ def __exit__(self, *args):
143175 self .close ()
144176
145177 def __del__ (self ):
146- self .close ()
147-
148- def add_article (self , article ):
178+ if not self ._closed :
179+ self .close ()
180+
181+ def add_article (self , article : Article ):
182+ """ Adds an article to the Creator.
183+
184+ Parameters
185+ ----------
186+ article : Zim writer Article
187+ The article to add to the file
188+ Raises
189+ ------
190+ RuntimeError
191+ If the ZimCreator was already finalized """
149192 self ._creatorWrapper .add_article (article )
150193 if not article .is_redirect ():
151- self ._update_article_counter (article )
152-
153- def _update_article_counter (self , article ):
154- # default dict update
155- self ._article_counter [article .get_mime_type ().strip ()] += 1
194+ # update article counter
195+ self ._article_counter [article .get_mime_type ().strip ()] += 1
156196
157197 def update_metadata (self , ** kwargs : str ):
158198 """ Updates Creator metadata for ZIM, supplied as keyword arguments """
159- new_metadata = {pascalize (k ): v for k , v in kwargs .items ()}
160- self ._metadata .update (new_metadata )
161199
162- def write_metadata (self ):
200+ def pascalize (keyword : str ):
201+ """ Converts python case to pascal case.
202+
203+ example: long_description -> LongDescription """
204+ return "" .join (keyword .title ().split ("_" ))
205+
206+ self ._metadata .update ({pascalize (k ): v for k , v in kwargs .items ()})
207+
208+ def close (self ):
209+ """ Finalizes and writes added articles to the file
210+
211+ Raises
212+ ------
213+ RuntimeError
214+ If the ZimCreator was already finalized """
215+ if self ._closed :
216+ raise RuntimeError ("Creator already closed" )
217+
218+ # Store _medtadata dict as MetadataArticle
163219 for key , value in self ._metadata .items ():
164- if key == "Date" and isinstance (value , datetime .date ):
220+ if key == "Date" and isinstance (value , ( datetime .date , datetime . datetime ) ):
165221 value = value .strftime ("%Y-%m-%d" )
166222 article = MetadataArticle (key , value )
167223 self ._creatorWrapper .add_article (article )
168224
169- article = MetadataArticle ("Counter" , self ._get_counter_string ())
225+ counter_str = ";" .join ([f"{ k } ={ v } " for (k , v ) in self ._article_counter .items ()])
226+ article = MetadataArticle ("Counter" , counter_str )
170227 self ._creatorWrapper .add_article (article )
171228
172- def _get_counter_string (self ):
173- return ";" .join (["%s=%s" % (k , v ) for (k , v ) in self ._article_counter .items ()])
174-
175- def close (self ):
176- self .write_metadata ()
177229 self ._creatorWrapper .finalize ()
230+ self ._closed = True
178231
179- def __repr__ (self ):
232+ def __repr__ (self ) -> str :
180233 return f"Creator(filename={ self .filename } )"
0 commit comments