Skip to content

Commit 10305bb

Browse files
jdcaballerovmgautierfr
authored andcommitted
Refactor libzim
1 parent 2cc7556 commit 10305bb

File tree

13 files changed

+400
-655
lines changed

13 files changed

+400
-655
lines changed

README.md

Lines changed: 46 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ docker-compose run libzim /bin/bash
77
```
88
```bash
99
python setup.py build_ext -i
10-
python tests/test_pyzim.py
10+
python tests/test_libzim.py
1111

1212
# or
1313

@@ -18,38 +18,63 @@ python tests/test_pyzim.py
1818
Example:
1919

2020
```python3
21-
import pyzim
21+
from libzim import ZimArticle, ZimBlob, ZimCreator
2222

23-
zim_file_path = "/opt/python-libzim/tests/wikipedia_es_physics_mini.zim"
24-
zim_reader = pyzim.ZimReader(zim_file_path)
23+
class ZimTestArticle(ZimArticle):
24+
content = '''<!DOCTYPE html>
25+
<html class="client-js">
26+
<head><meta charset="UTF-8">
27+
<title>Monadical</title>
28+
<h1> ñññ Hello, it works ñññ </h1></html>'''
2529

26-
article = pyzim.ZimArticle()
30+
def __init__(self):
31+
ZimArticle.__init__(self)
2732

28-
# article content
33+
def is_redirect(self):
34+
return False
2935

30-
article_title = "Monadical SAS"
31-
article_url = "Monadical_SAS"
32-
article_longurl ="A/Monadical_SAS"
33-
article_mimetype = "text/html"
34-
article_content = '''<!DOCTYPE html> <html class="client-js"><head><meta charset="UTF-8">
35-
<title>Monadical SAS</title> <h1> Hello, it works Monadical ñññ </h1></html>'''
36+
@property
37+
def can_write(self):
38+
return True
3639

37-
article.title = article_title
38-
article.url = article_url
39-
article.mimetype = article_mimetype
40-
article.content = article_content
40+
def get_url(self):
41+
return "A/Monadical_SAS"
4142

43+
def get_title(self):
44+
return "Monadical SAS"
45+
46+
def get_mime_type(self):
47+
return "text/html"
48+
49+
def get_filename(self):
50+
return ""
51+
52+
def should_compress(self):
53+
return True
4254

43-
import uuid
55+
def should_index(self):
56+
return True
57+
58+
def get_data(self):
59+
return ZimBlob(self.content.encode('UTF-8'))
60+
61+
# Create a ZimTestArticle article
62+
63+
article = ZimTestArticle()
64+
print(article.content)
4465

66+
# Write the articles
67+
import uuid
4568
rnd_str = str(uuid.uuid1())
69+
4670
test_zim_file_path = "/opt/python-libzim/tests/kiwix-test"
47-
zim_creator = pyzim.ZimCreator(test_zim_file_path + '-' + rnd_str + '.zim',"welcome","spa",2048)
4871

72+
zim_creator = ZimCreator(test_zim_file_path + '-' + rnd_str + '.zim',main_page = "welcome",index_language= "eng", min_chunk_size= 2048)
4973

74+
# Add article to zim file
75+
zim_creator.add_article(article)
5076

77+
# Write article to zim file
78+
zim_creator.finalize()
5179

52-
# Add and write article to second test zim file
53-
zim_creator.add_article(article)
54-
zim_creator.finalise()
5580
```

libzim/examples.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from libzim import ZimArticle, ZimBlob, ZimCreator
2+
3+
class ZimTestArticle(ZimArticle):
4+
content = '''<!DOCTYPE html>
5+
<html class="client-js">
6+
<head><meta charset="UTF-8">
7+
<title>Monadical</title>
8+
<h1> ñññ Hello, it works ñññ </h1></html>'''
9+
10+
def __init__(self):
11+
ZimArticle.__init__(self)
12+
13+
def is_redirect(self):
14+
return False
15+
16+
@property
17+
def can_write(self):
18+
return True
19+
20+
def get_url(self):
21+
return "A/Monadical_SAS"
22+
23+
def get_title(self):
24+
return "Monadical SAS"
25+
26+
def get_mime_type(self):
27+
return "text/html"
28+
29+
def get_filename(self):
30+
return ""
31+
32+
def should_compress(self):
33+
return True
34+
35+
def should_index(self):
36+
return True
37+
38+
def get_data(self):
39+
return ZimBlob(self.content.encode('UTF-8'))
40+
41+
# Create a ZimTestArticle article
42+
43+
article = ZimTestArticle()
44+
print(article.content)
45+
46+
# Write the article
47+
import uuid
48+
rnd_str = str(uuid.uuid1())
49+
50+
test_zim_file_path = "/opt/python-libzim/tests/kiwix-test"
51+
52+
zim_creator = ZimCreator(test_zim_file_path + '-' + rnd_str + '.zim',main_page = "welcome",index_language= "eng", min_chunk_size= 2048)
53+
54+
# Add article to zim file
55+
zim_creator.add_article(article)
56+
57+
# Write articles to zim file
58+
zim_creator.finalize()
59+

pyzim/lib.cxx renamed to libzim/lib.cxx

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include <Python.h>
22
#include "lib.h"
33

4-
#include "pyzim_api.h"
4+
#include "libzim_api.h"
55

66
#include <iostream>
77
#include <zim/writer/url.h>
@@ -16,10 +16,10 @@
1616

1717
ZimArticleWrapper::ZimArticleWrapper(PyObject *obj) : m_obj(obj)
1818
{
19-
if (import_pyzim())
19+
if (import_libzim())
2020
{
21-
std::cerr << "Error executing import_pyzim!\n";
22-
throw std::runtime_error("Error executing import_pyzim");
21+
std::cerr << "Error executing import_libzim!\n";
22+
throw std::runtime_error("Error executing import_libzim");
2323
}
2424
else
2525
{
@@ -189,6 +189,11 @@ class OverriddenZimCreator : public zim::writer::Creator
189189
return zim::writer::Url('A', mainPage);
190190
}
191191

192+
void setMainUrl(std::string newUrl)
193+
{
194+
mainPage = newUrl;
195+
}
196+
192197
std::string mainPage;
193198
};
194199

@@ -208,7 +213,7 @@ ZimCreatorWrapper::
208213
{
209214
bool shouldIndex = !fullTextIndexLanguage.empty();
210215

211-
OverriddenZimCreator *c = new OverriddenZimCreator(mainPage); // TODO: consider when to delete this
216+
OverriddenZimCreator *c = new OverriddenZimCreator(mainPage);
212217
c->setIndexing(shouldIndex, fullTextIndexLanguage);
213218
c->setMinChunkSize(minChunkSize);
214219
c->startZimCreation(fileName);
@@ -220,8 +225,18 @@ void ZimCreatorWrapper::addArticle(std::shared_ptr<ZimArticleWrapper> article)
220225
_creator->addArticle(article);
221226
}
222227

223-
void ZimCreatorWrapper::finalise()
228+
void ZimCreatorWrapper::finalize()
224229
{
225230
_creator->finishZimCreation();
226231
delete this;
232+
}
233+
234+
void ZimCreatorWrapper::setMainUrl(std::string newUrl)
235+
{
236+
_creator->setMainUrl(newUrl);
237+
}
238+
239+
zim::writer::Url ZimCreatorWrapper::getMainUrl()
240+
{
241+
return _creator->getMainUrl();
227242
}

pyzim/lib.h renamed to libzim/lib.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// -*- c++ -*-
2-
#ifndef PYZIM_LIB_H
3-
#define PYZIM_LIB_H 1
2+
#ifndef libzim_LIB_H
3+
#define libzim_LIB_H 1
44

55
struct _object;
66
typedef _object PyObject;
@@ -53,7 +53,9 @@ class ZimCreatorWrapper
5353
~ZimCreatorWrapper();
5454
static ZimCreatorWrapper *create(std::string fileName, std::string mainPage, std::string fullTextIndexLanguage, int minChunkSize);
5555
void addArticle(std::shared_ptr<ZimArticleWrapper> article);
56-
void finalise();
56+
void finalize();
57+
void setMainUrl(std::string newUrl);
58+
zim::writer::Url getMainUrl();
5759
};
5860

59-
#endif // !PYZIM_LIB_H
61+
#endif // !libzim_LIB_H

libzim/libzim.pxd

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from libcpp.string cimport string
2+
from libc.stdint cimport uint32_t, uint64_t
3+
from libcpp cimport bool
4+
from libcpp.memory cimport shared_ptr
5+
from libcpp.vector cimport vector
6+
7+
from cpython.ref cimport PyObject
8+
9+
cdef extern from "zim/blob.h" namespace "zim":
10+
cdef cppclass Blob:
11+
Blob() except +
12+
Blob(const char* data, uint64_t size) except +
13+
const char* data() except +
14+
const char* end() except +
15+
uint64_t size() except +
16+
17+
cdef extern from "zim/writer/url.h" namespace "zim::writer":
18+
cdef cppclass Url:
19+
string getLongUrl() except +
20+
21+
22+
cdef extern from "zim/writer/article.h" namespace "zim::writer":
23+
cdef cppclass Article:
24+
const string getTitle() except +
25+
26+
27+
cdef extern from "lib.h":
28+
cdef cppclass ZimArticleWrapper(Article):
29+
ZimArticleWrapper(PyObject *obj) except +
30+
const Url getUrl() except +
31+
const string getTitle() except +
32+
const bool isRedirect() except +
33+
const string getMimeType() except +
34+
const string getFilename() except +
35+
const bool shouldCompress() except +
36+
const bool shouldIndex() except +
37+
const Url getRedirectUrl() except +
38+
const Blob getData() except +
39+
40+
cdef cppclass ZimCreatorWrapper:
41+
@staticmethod
42+
ZimCreatorWrapper *create(string fileName, string mainPage, string fullTextIndexLanguage, int minChunkSize) except +
43+
void addArticle(shared_ptr[ZimArticleWrapper] article) except +
44+
void finalize() except +
45+
Url getMainUrl() except +
46+
void setMainUrl(string) except +

0 commit comments

Comments
 (0)