Skip to content

Commit 34fd489

Browse files
jdcaballerovmgautierfr
authored andcommitted
Add enforce mandatory metadata feature
1 parent f0bcad5 commit 34fd489

File tree

3 files changed

+73
-8
lines changed

3 files changed

+73
-8
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ zim_creator = ZimCreator(test_zim_file_path + '-' + rnd_str + '.zim',main_page =
7575
# Add article to zim file
7676
zim_creator.add_article(article)
7777

78+
79+
# Set mandatory metadata
80+
if not zim_creator.mandatory_metadata_ok:
81+
zim_creator.update_metadata(creator='python-libzim',description='Created in python',name='Hola',publisher='Monadical',title='Test Zim')
82+
7883
# Write article to zim file
7984
zim_creator.finalize()
8085

libzim/examples.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ def get_data(self):
5555
# Add article to zim file
5656
zim_creator.add_article(article)
5757

58+
# Set mandatory metadata
59+
if not zim_creator.mandatory_metadata_ok:
60+
zim_creator.update_metadata(creator='python-libzim',description='Created in python',name='Hola',publisher='Monadical',title='Test Zim')
61+
5862
# Write articles to zim file
5963
zim_creator.finalize()
6064

libzim/libzim.pyx

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,24 @@ cdef public api:
132132
# ZimCreator #
133133
#########################
134134

135-
#TODO Should we declare an article for metadata or left to the user managing ?
136-
135+
#TODO Write metadata
136+
137+
MANDATORY_METADATA_KEYS =[
138+
"Name",
139+
"Title",
140+
"Creator",
141+
"Publisher",
142+
"Date",
143+
"Description",
144+
"Language"]
145+
# Optional
146+
#"LongDescription",
147+
#"Licence",
148+
#"Tags",
149+
#"Flavour",
150+
#"Source",
151+
#"Counter",
152+
#"Scraper"]
137153

138154
cdef class ZimCreator:
139155
"""
@@ -153,6 +169,10 @@ cdef class ZimCreator:
153169
Zim file Index language
154170
_min_chunk_size : str
155171
Zim file minimum chunk size
172+
_article_counter
173+
Zim file article counter
174+
_metadata
175+
Zim file metadata
156176
"""
157177

158178
cdef ZimCreatorWrapper *c_creator
@@ -162,15 +182,16 @@ cdef class ZimCreator:
162182
cdef object _index_language
163183
cdef object _min_chunk_size
164184
cdef object _article_counter
185+
cdef object _metadata
165186

166-
def __cinit__(self, str filename, str main_page = "", str index_language = "eng", min_chunk_size = 2048):
187+
def __init__(self, str filename, str main_page = "", str index_language = "eng", min_chunk_size = 2048):
167188
"""Constructs a ZimCreator from parameters.
168189
Parameters
169190
----------
170191
filename : str
171192
Zim file path
172193
main_page : str
173-
Zim file main_page
194+
Zim file main page
174195
index_language : str
175196
Zim file index language (default eng)
176197
min_chunk_size : int
@@ -183,8 +204,10 @@ cdef class ZimCreator:
183204
self._main_page = self.c_creator.getMainUrl().getLongUrl().decode("UTF-8", "strict")
184205
self._index_language = index_language
185206
self._min_chunk_size = min_chunk_size
207+
self._metadata = {k:None for k in MANDATORY_METADATA_KEYS}
186208

187209
self._article_counter = defaultdict(int)
210+
self.update_metadata(date=datetime.date.today(), language= index_language)
188211

189212

190213
@property
@@ -216,9 +239,38 @@ cdef class ZimCreator:
216239
"""Get the minimum chunk size of the ZimCreator object"""
217240
return self._min_chunk_size
218241

219-
def _update_article_counter(self, ZimArticle article):
242+
def get_article_counter_string(self):
243+
return ";".join(["%s=%s" % (k,v) for (k,v) in self._article_counter.items()])
244+
245+
def _get_metadata(self):
246+
metadata = self._metadata
247+
248+
counter_string = self.get_article_counter_string()
249+
if counter_string:
250+
metadata['Counter'] = counter_string
251+
252+
return metadata
253+
254+
@property
255+
def mandatory_metadata_ok(self):
256+
"""Flag if mandatory metadata is complete and not empty"""
257+
metadata_item_ok = [self._metadata[k] for k in MANDATORY_METADATA_KEYS]
258+
return all(metadata_item_ok)
259+
260+
def update_metadata(self, **kwargs):
261+
"Updates article metadata"""
262+
# Converts python case to pascal case. example: long_description-> LongDescription
263+
pascalize = lambda keyword: "".join(keyword.title().split("_"))
264+
265+
if "date" in kwargs and isinstance(kwargs['date'],datetime.date):
266+
kwargs['date'] = kwargs['date'].strftime('%Y-%m-%d')
267+
268+
new_metadata = {pascalize(key): value for key, value in kwargs.items()}
269+
self._metadata.update(new_metadata)
270+
271+
def _update_article_counter(self, ZimArticle article not None):
220272
# default dict update
221-
self._article_counter[article.mimetype] += 1
273+
self._article_counter[article.get_mime_type().strip()] += 1
222274

223275
def add_article(self, ZimArticle article not None):
224276
"""Add a ZimArticle to the Creator object.
@@ -247,7 +299,7 @@ cdef class ZimCreator:
247299
except:
248300
raise
249301
else:
250-
if not article.is_redirect:
302+
if not article.is_redirect():
251303
self._update_article_counter(article)
252304

253305
def finalize(self):
@@ -257,9 +309,13 @@ cdef class ZimCreator:
257309
------
258310
RuntimeError
259311
If the ZimCreator was already finalized
312+
Runtime Error
313+
If mandatory metadata is missing
260314
"""
261-
262315
if not self._finalized:
316+
if not self.mandatory_metadata_ok:
317+
raise RuntimeError("Mandatory metadata missing")
318+
263319
self.c_creator.finalize()
264320
self._finalized = True
265321
else:

0 commit comments

Comments
 (0)