Skip to content

Commit a922383

Browse files
jdcaballerovmgautierfr
authored andcommitted
Refactor to use Cython properties
1 parent b7e6d02 commit a922383

File tree

5 files changed

+84
-242
lines changed

5 files changed

+84
-242
lines changed

pyzim/examples.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
<h1> Hola Funciona </h1></html>'''
88

99

10-
article = pyzim.ZimArticle(ns='A', url = 'Monadical', title='Monadical SAS', content=content.encode(), should_index = True)
10+
article = pyzim.ZimArticle(namespace='A', url = 'Monadical', title='Monadical SAS', content=content, should_index = True)
1111

1212
import uuid
1313

@@ -17,6 +17,7 @@
1717

1818
zim_creator = pyzim.ZimCreator(test_zim_file_path + '-' + rnd_str + '.zim',"welcome","eng",2048)
1919
zim_creator.add_article(article)
20+
zim_creator.finalise()
2021

2122
test_zim_reader = pyzim.ZimReader(test_zim_file_path + '-' + rnd_str + '.zim')
2223

pyzim/pyzim.pyx

Lines changed: 45 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from libcpp.string cimport string
2-
from libc.stdint cimport uint32_t, uint64_t
32
from libcpp cimport bool
43
from libcpp.memory cimport shared_ptr, make_shared
54

@@ -24,25 +23,28 @@ cdef class ZimArticle:
2423
----------
2524
*c_zim_article : zim.ZimArticle
2625
a pointer to the C++ article object
27-
_namespace : str
26+
_can_write : bool
27+
flag if the article is ready for writing
28+
29+
Properties
30+
-----------
31+
namespace: str
2832
the article namespace
29-
_title : str
33+
title : str
3034
the article title
31-
_content : str
35+
content : str
3236
the article content
33-
_longurl : str
37+
longurl : str
3438
the article long url i.e {NAMESPACE}/{redirect_url}
35-
_url : str
39+
url : str
3640
the article url
37-
_mimetype : str
41+
mimetype : str
3842
the article mimetype
39-
_is_redirect : bool
43+
is_redirect : bool
4044
flag if the article is a redirect
41-
_can_write : bool
42-
flag if the article is ready for writing
43-
_redirect_longurl: str
45+
redirect_longurl: str
4446
the long redirect url i.e {NAMESPACE}/{redirect_url}
45-
_redirect_url : str
47+
redirect_url : str
4648
the article url
4749
4850
Methods
@@ -53,40 +55,27 @@ cdef class ZimArticle:
5355
5456
"""
5557
cdef zim.ZimArticle *c_zim_article
56-
cdef object _namespace
57-
cdef object _title
58-
cdef object _content
59-
cdef object _longurl
60-
cdef object _url
61-
cdef object _mimetype
62-
cdef object _is_redirect
6358
cdef object _can_write
64-
cdef object _redirect_longurl
65-
cdef object _redirect_url
6659

6760
VALID_NAMESPACES = ["-","A","B","I","J","M","U","V","W","X"]
6861

6962

70-
def __cinit__(self, url="", str content="", namespace= "A", mimetype= "text/html", title="", redirect_article_url= "", article_id = "",filename="", should_index=True ):
63+
def __cinit__(self, url="", str content="", namespace= "A", mimetype= "text/html", title="", redirect_article_url= "",filename="", should_index=True ):
7164

7265
# Encoding must be set to UTF-8
73-
cdef bytes py_bytes = content.encode(encoding='UTF-8')
74-
cdef char* c_string = py_bytes
66+
#cdef bytes py_bytes = content.encode(encoding='UTF-8')
67+
#cdef char* c_string = py_bytes
7568

7669
if namespace not in self.VALID_NAMESPACES:
7770
raise RuntimeError("Invalid Namespace")
7871

7972
c_zim_art = new zim.ZimArticle(ord(namespace), # Namespace
80-
article_id.encode('UTF-8'), # Article index
8173
url.encode('UTF-8'), # url
8274
title.encode('UTF-8'), # title
8375
mimetype.encode('UTF-8'), # mimeType
8476
redirect_article_url.encode('UTF-8'),# redirectUrl
85-
filename.encode('UTF-8'), # filename
86-
True, # shouldIndex
87-
c_string, # data buffer
88-
len(c_string)) # data buffer length
89-
77+
should_index, # shouldIndex
78+
content.encode('UTF-8'))
9079
self.__setup(c_zim_art)
9180

9281
def __dealloc__(self):
@@ -104,95 +93,31 @@ cdef class ZimArticle:
10493
A python ZimArticle always maintains a pointer to a wrapped zim.ZimArticle C++ object.
10594
The python object reflects the state, accessible with properties, of a wrapped C++ zim.ZimArticle,
10695
this ensures a valid wrapped article that can be passed to a zim.ZimCreator.
107-
10896
10997
Parameters
11098
----------
11199
*art : zim.ZimArticle
112100
Pointer to a C++ article object
113101
114102
"""
115-
116-
# Delete old internal C zim.ZimArticle article if any
117-
if self.c_zim_article != NULL:
118-
del self.c_zim_article
119-
120103
# Set new internal C zim.ZimArticle article
121104
self.c_zim_article = art
122105

123-
# Setup members
124-
self._title = self.c_zim_article.getTitle().decode("UTF-8", "strict")
125-
126-
b = self.c_zim_article.getData()
127-
#print("****************** B.DATA() de %s ***********************" % self._title)
128-
#print(b.data())
129-
self._content = b.data()[:b.size()].decode("UTF-8", "strict")
130-
131-
self._longurl = self.c_zim_article.getUrl().getLongUrl().decode("UTF-8", "strict")
132-
self._url = self._longurl[2:]
133-
self._namespace = self._longurl[0]
134-
135-
self._redirect_longurl = self.c_zim_article.getRedirectUrl().getLongUrl().decode("UTF-8", "strict")
136-
self._redirect_url = self._redirect_longurl[2:]
137-
138-
self._mimetype = self.c_zim_article.getMimeType().decode("UTF-8", "strict")
139-
self._is_redirect = self.c_zim_article.isRedirect()
140-
141106
# An article must have at least non empty url to be writable
142107
# Content can be empty if article is a redirect
143-
if self._longurl.strip() and (self._content.strip() or self._is_redirect):
108+
if self.longurl and (self.content or self.is_redirect):
144109
self._can_write = True
145110
else:
146111
self._can_write = False
147112

148113
def get_article_properties(self):
149114
return dict((name, getattr(self, name)) for name in dir(self) if not name.startswith('__') )
150115

151-
# props is a dictionary, Cython cdef can't use **kwargs
152-
cdef update_c_zim_article_from_properties(self, dict props):
153-
154-
# Encoding must be set to UTF-8
155-
cdef bytes py_bytes = props.get("content",u"").encode()
156-
cdef char* c_string = py_bytes
157-
158-
ns = ord(props["namespace"])
159-
article_id = props.get("article_id","")
160-
url = props["url"]
161-
title = props["title"]
162-
mimetype = props["mimetype"]
163-
redirect_article_url = props.get("redirect_url")
164-
filename = props.get("filename","")
165-
should_index = props.get("should_index",True)
166-
167-
168-
c_zim_art = new zim.ZimArticle(ns, # Namespace
169-
"".encode('UTF-8'), # Article index
170-
url.encode('UTF-8'), # url
171-
title.encode('UTF-8'), # title
172-
mimetype.encode('UTF-8'), # mimeType
173-
redirect_article_url.encode('UTF-8'),# redirectAid
174-
"".encode('UTF-8'), # filename
175-
should_index, # shouldIndex
176-
c_string, # data buffer
177-
len(c_string)) # data buffer lengt
178-
179-
180-
self.__setup(c_zim_art)
181-
182-
def _update_property(self, **kwargs):
183-
properties = self.get_article_properties()
184-
properties.update(kwargs)
185-
186-
self.update_c_zim_article_from_properties(properties)
187-
188116
# Factory functions - Currently Cython can't use classmethods
189117
@staticmethod
190118
cdef from_read_article(zim.Article art):
191119
"""Creates a python ZimArticle from a C++ zim.Article article.
192-
193-
Retu
194120
195-
196121
Parameters
197122
----------
198123
art : zim.ZimArticle
@@ -208,98 +133,89 @@ cdef class ZimArticle:
208133
@property
209134
def namespace(self):
210135
"""Get the article's namespace"""
211-
return self._namespace
136+
return chr(self.c_zim_article.ns)
212137

213138
@namespace.setter
214139
def namespace(self,new_namespace):
215140
"""Set the article's namespace"""
216-
self._update_property(namespace=new_namespace)
217-
141+
if new_namespace not in self.VALID_NAMESPACES:
142+
raise RuntimeError("Invalid Namespace")
143+
self.c_zim_article.ns = ord(new_namespace[0])
144+
218145
@property
219146
def title(self):
220147
"""Get the article's title"""
221-
return self._title
148+
return self.c_zim_article.title.decode('UTF-8')
222149

223150
@title.setter
224151
def title(self, new_title):
225152
"""Set the article's namespace"""
226-
self._update_property(title=new_title)
153+
self.c_zim_article.title = new_title.encode('UTF-8')
227154

228155
@property
229156
def content(self):
230157
"""Get the article's content"""
231-
return self._content
158+
return self.c_zim_article.content.decode('UTF-8')
232159

233160
@content.setter
234161
def content(self, new_content):
235162
"""Set the article's content"""
236-
self._update_property(content=new_content)
163+
self.c_zim_article.content = new_content.encode('UTF-8')
237164

238165
@property
239166
def longurl(self):
240167
"""Get the article's long url i.e {NAMESPACE}/{url}"""
241-
return self._longurl
168+
return self.c_zim_article.getUrl().getLongUrl().decode("UTF-8", "strict")
242169

243170
@property
244171
def url(self):
245172
"""Get the article's url"""
246-
return self._url
173+
return self.c_zim_article.url.decode('UTF-8')
247174

248175
@url.setter
249176
def url(self, new_url):
250177
"""Set the article's url"""
251-
self._update_property(url=new_url)
178+
self.c_zim_article.url = new_url.encode('UTF-8')
252179

253180
@property
254181
def redirect_longurl(self):
255182
"""Get the article's redirect long url i.e {NAMESPACE}/{redirect_url}"""
256-
return self._redirect_longurl
183+
return self.c_zim_article.getRedirectUrl().getLongUrl().decode("UTF-8", "strict")
257184

258185
@property
259186
def redirect_url(self):
260187
"""Get the article's redirect url"""
261-
return self._redirect_url
188+
return self.c_zim_article.redirectUrl.decode('UTF-8')
262189

263190
@redirect_url.setter
264191
def redirect_url(self, new_redirect_url):
265192
"""Set the article's redirect url"""
266-
self._update_property(redirect_url=new_redirect_url)
193+
self.c_zim_article.redirectUrl = new_redirect_url.encode('UTF-8')
267194

268195
@property
269196
def mimetype(self):
270197
"""Get the article's mimetype"""
271-
return self._mimetype
198+
return self.c_zim_article.mimeType.decode('UTF-8')
272199

273200
@mimetype.setter
274201
def mimetype(self, new_mimetype):
275202
"""Set the article's mimetype"""
276-
self._update_property(mimetype=new_mimetype)
203+
self.c_zim_article.mimeType = new_mimetype.encode('UTF-8')
277204

278205
@property
279206
def is_redirect(self):
280207
"""Get if the article is a redirect"""
281-
return self._is_redirect
208+
return self.c_zim_article.isRedirect()
282209

283210
@property
284211
def can_write(self):
285212
"""Get if the article is valid for writing"""
213+
if self.longurl and (self.content or self.is_redirect):
214+
self._can_write = True
215+
else:
216+
self._can_write = False
286217
return self._can_write
287218

288-
# ZimArticle.good only available for zim:Article
289-
#def _good(self):
290-
# return self.c_zim_article.good()
291-
292-
# ZimArticle.getRedirectArticle only available for zim:Article
293-
#def get_redirect_article(self):
294-
# cdef Article article = Article()
295-
# cdef zim.Article art = self.c_article.getRedirectArticle()
296-
# if not art.good():
297-
# raise RuntimeError("Article is not a redirectArticle")
298-
# article.setup(art)
299-
# return article
300-
301-
302-
303219
#########################
304220
# ZimReader #
305221
#########################
@@ -427,7 +343,7 @@ cdef class ZimCreator:
427343
if not article.can_write:
428344
raise RuntimeError("Article is not good for writing")
429345

430-
# Make a shared pointer to ZimArticle from the ZimArticle object (deref internal c_zim_article)
346+
# Make a shared pointer to ZimArticle from the ZimArticle object (dereference internal c_zim_article)
431347
cdef shared_ptr[zim.ZimArticle] art = make_shared[zim.ZimArticle](dereference(article.c_zim_article));
432348
try:
433349
self.c_creator.addArticle(art)
@@ -470,31 +386,10 @@ cdef class ZimCreator:
470386
new_metadata = {pascalize(key): value for key, value in kwargs.items()}
471387
self._metadata.update(new_metadata)
472388

473-
def add_art(self, str out_content):
474-
475-
cdef bytes py_bytes = out_content.encode()
476-
cdef char* c_string = py_bytes
477-
478-
c_zim_art = new zim.ZimArticle(ord("A"), # NS
479-
"".encode(), # aid
480-
"Hola".encode(), # url
481-
"JDC HOLA".encode(), # title
482-
"text/html".encode(), # mimeType
483-
"".encode(), # redirectAid
484-
"".encode(), # filename
485-
True, # shouldIndex
486-
c_string, # data buffer
487-
len(c_string)) # data buffer length
488-
# print(c_zim_art.getTitle())
489-
#b = c_zim_art.getData()
490-
#print(b.data()[:b.size()])
491-
cdef shared_ptr[zim.ZimArticle] art = make_shared[zim.ZimArticle](dereference(c_zim_art));
492-
self.c_creator.addArticle(art)
493-
494389
def finalise(self):
495390
if not self._finalised:
496-
#TODO uncomment after debug
497391
self._write_metadata(self.get_metadata())
498392
self.c_creator.finalise()
393+
self._finalised = True
499394
else:
500-
raise RuntimeError("ZimCreator was already finalised")
395+
raise RuntimeError("ZimCreator already finalised")

0 commit comments

Comments
 (0)