@@ -132,8 +132,24 @@ cdef public api:
132132# ZimCreator #
133133# ########################
134134
135- # TODO Should we declare an article for metadata or left to the user managing ?
136-
135+ # TODO Write metadata
136+
137+ MANDATORY_METADATA_KEYS = [
138+ " Name" ,
139+ " Title" ,
140+ " Creator" ,
141+ " Publisher" ,
142+ " Date" ,
143+ " Description" ,
144+ " Language" ]
145+ # Optional
146+ # "LongDescription",
147+ # "Licence",
148+ # "Tags",
149+ # "Flavour",
150+ # "Source",
151+ # "Counter",
152+ # "Scraper"]
137153
138154cdef class ZimCreator:
139155 """
@@ -153,6 +169,10 @@ cdef class ZimCreator:
153169 Zim file Index language
154170 _min_chunk_size : str
155171 Zim file minimum chunk size
172+ _article_counter
173+ Zim file article counter
174+ _metadata
175+ Zim file metadata
156176 """
157177
158178 cdef ZimCreatorWrapper * c_creator
@@ -162,15 +182,16 @@ cdef class ZimCreator:
162182 cdef object _index_language
163183 cdef object _min_chunk_size
164184 cdef object _article_counter
185+ cdef object _metadata
165186
166- def __cinit__ (self , str filename , str main_page = " " , str index_language = " eng" , min_chunk_size = 2048 ):
187+ def __init__ (self , str filename , str main_page = " " , str index_language = " eng" , min_chunk_size = 2048 ):
167188 """ Constructs a ZimCreator from parameters.
168189 Parameters
169190 ----------
170191 filename : str
171192 Zim file path
172193 main_page : str
173- Zim file main_page
194+ Zim file main page
174195 index_language : str
175196 Zim file index language (default eng)
176197 min_chunk_size : int
@@ -183,8 +204,10 @@ cdef class ZimCreator:
183204 self ._main_page = self .c_creator.getMainUrl().getLongUrl().decode(" UTF-8" , " strict" )
184205 self ._index_language = index_language
185206 self ._min_chunk_size = min_chunk_size
207+ self ._metadata = {k:None for k in MANDATORY_METADATA_KEYS}
186208
187209 self ._article_counter = defaultdict(int )
210+ self .update_metadata(date = datetime.date.today(), language = index_language)
188211
189212
190213 @property
@@ -216,9 +239,38 @@ cdef class ZimCreator:
216239 """ Get the minimum chunk size of the ZimCreator object"""
217240 return self ._min_chunk_size
218241
219- def _update_article_counter (self , ZimArticle article ):
242+ def get_article_counter_string (self ):
243+ return " ;" .join([" %s =%s " % (k,v) for (k,v) in self ._article_counter.items()])
244+
245+ def _get_metadata (self ):
246+ metadata = self ._metadata
247+
248+ counter_string = self .get_article_counter_string()
249+ if counter_string:
250+ metadata[' Counter' ] = counter_string
251+
252+ return metadata
253+
254+ @property
255+ def mandatory_metadata_ok (self ):
256+ """ Flag if mandatory metadata is complete and not empty"""
257+ metadata_item_ok = [self ._metadata[k] for k in MANDATORY_METADATA_KEYS]
258+ return all (metadata_item_ok)
259+
260+ def update_metadata (self , **kwargs ):
261+ " Updates article metadata" " "
262+ # Converts python case to pascal case. example: long_description-> LongDescription
263+ pascalize = lambda keyword : " " .join(keyword.title().split(" _" ))
264+
265+ if " date" in kwargs and isinstance (kwargs[' date' ],datetime.date):
266+ kwargs[' date' ] = kwargs[' date' ].strftime(' %Y -%m -%d ' )
267+
268+ new_metadata = {pascalize(key): value for key, value in kwargs.items()}
269+ self ._metadata.update(new_metadata)
270+
271+ def _update_article_counter (self , ZimArticle article not None ):
220272 # default dict update
221- self ._article_counter[article.mimetype ] += 1
273+ self ._article_counter[article.get_mime_type().strip() ] += 1
222274
223275 def add_article (self , ZimArticle article not None ):
224276 """ Add a ZimArticle to the Creator object.
@@ -247,7 +299,7 @@ cdef class ZimCreator:
247299 except :
248300 raise
249301 else :
250- if not article.is_redirect:
302+ if not article.is_redirect() :
251303 self ._update_article_counter(article)
252304
253305 def finalize (self ):
@@ -257,9 +309,13 @@ cdef class ZimCreator:
257309 ------
258310 RuntimeError
259311 If the ZimCreator was already finalized
312+ Runtime Error
313+ If mandatory metadata is missing
260314 """
261-
262315 if not self ._finalized:
316+ if not self .mandatory_metadata_ok:
317+ raise RuntimeError (" Mandatory metadata missing" )
318+
263319 self .c_creator.finalize()
264320 self ._finalized = True
265321 else :
0 commit comments