1- import pytest
1+ import gc
22from pathlib import Path
33
4- DATA_DIR = Path ( __file__ ). parent
4+ import pytest
55
66from libzim .reader import File
77
8+ DATA_DIR = Path (__file__ ).parent
9+
10+
11+ ZIMFILES = [
12+ {
13+ 'filename' : str (DATA_DIR / "wikipedia_es_physics_mini.zim" ),
14+ 'checksum' : u"99ea7a5598c6040c4f50b8ac0653b703" ,
15+ 'namespaces' : u"-AIMX" ,
16+ 'article_count' : 22027 ,
17+ 'main_page_url' : u"A/index" ,
18+ }
19+ ]
20+
21+
22+
23+
24+ @pytest .fixture (params = ZIMFILES )
25+ def zimdata (request ):
26+ return request .param
27+
828@pytest .fixture
9- def reader_data ():
10- return (
11- File (str (DATA_DIR / "wikipedia_es_physics_mini.zim" )),
12- {
13- 'filename' : str (DATA_DIR / "wikipedia_es_physics_mini.zim" ),
14- 'checksum' : u"99ea7a5598c6040c4f50b8ac0653b703" ,
15- 'namespaces' : u"-AIMX" ,
16- 'article_count' : 22027 ,
17- 'main_page_url' : u"A/index"
18- }
19- )
29+ def reader (zimdata ):
30+ return File (zimdata ['filename' ])
2031
2132
2233@pytest .fixture
@@ -25,45 +36,71 @@ def article_data():
2536 'url' : u"A/Albert_Einstein" ,
2637 'title' : u"Albert Einstein" ,
2738 'mimetype' :u"text/html" ,
28- 'article_id' : 663
39+ 'article_id' : 663 ,
40+ 'size' : 17343
2941 }
3042
3143
32- def test_zim_filename (reader_data ):
33- reader , data = reader_data
34- for k , v in data .items ():
44+ def test_zim_filename (reader , zimdata ):
45+ for k , v in zimdata .items ():
3546 assert getattr (reader , k ) == v
3647
37- def test_zim_read (reader_data , article_data ):
38- reader , _ = reader_data
48+ def test_zim_read (reader , article_data ):
3949 article = reader .get_article (article_data ['url' ])
4050
4151 assert article .longurl == article_data ['url' ]
4252 assert article .title == article_data ['title' ]
4353 assert article .url == article_data ['url' ][2 :]
4454 assert article .mimetype == article_data ['mimetype' ]
55+ assert isinstance (article .content , memoryview )
56+ assert len (article .content ) == article_data ['size' ]
4557
46- def test_get_article_by_id (reader_data , article_data ):
47- reader , _ = reader_data
58+ def test_content_ref_keep (reader ):
59+ """Get the memoryview on a content and loose the reference on the article.
60+ We try to load a lot of other articles to detect possible use of dandling pointer
61+ """
62+ content = None
63+ def get_content ():
64+ nonlocal content
65+ article = reader .get_article (u"A/Albert_Einstein" )
66+ assert isinstance (article .content , memoryview )
67+ content = article .content
68+ get_content () # Now we have a content but no reference to the article.
69+ gc .collect ()
70+ # Load a lot of content
71+ for i in range (0 , reader .article_count , 2 ):
72+ article = reader .get_article_by_id (i )
73+ if not article .is_redirect :
74+ c = article .content
75+ # Check everything is ok
76+ assert len (content ) == 17343
77+ assert bytes (content [:100 ]) == b'<!DOCTYPE html>\n <html class="client-js"><head>\n <meta charset="UTF-8">\n <title>Albert Einstein</ti'
78+
79+ def test_get_article_by_id (reader , article_data ):
80+ return
4881 article = reader .get_article_by_id (article_data ['article_id' ])
4982
5083 assert article .longurl == article_data ['url' ]
5184 assert article .title == article_data ['title' ]
5285 assert article .url == article_data ['url' ][2 :]
5386 assert article .mimetype == article_data ['mimetype' ]
5487
55- def test_namespace_count (reader_data ):
56- reader , _ = reader_data
88+ def test_namespace_count (reader ):
5789 namespaces = reader .namespaces
5890 num_articles = sum (reader .get_namespaces_count (ns ) for ns in namespaces )
5991 assert reader .article_count == num_articles
6092
61- def test_suggest (reader_data ):
62- reader , _ = reader_data
93+ def test_suggest (reader ):
6394 results = reader .suggest (u"Einstein" )
6495 assert u"A/Albert_Einstein" in list (results )
6596
66- def test_search (reader_data ):
67- reader , _ = reader_data
97+ def test_search (reader ):
6898 results = reader .search (u"Einstein" )
6999 assert len (list (results )) == 10
100+
101+
102+ def test_get_wrong_article (reader ):
103+ with pytest .raises (RuntimeError ):
104+ reader .get_article_by_id (reader .article_count + 100 )
105+ with pytest .raises (RuntimeError ):
106+ reader .get_article ("A/I_do_not_exists" )
0 commit comments