Skip to content

Commit 15593f6

Browse files
committed
Readd Search API.
The suggestion api is still to be implemented.
1 parent 4ef4344 commit 15593f6

File tree

5 files changed

+166
-87
lines changed

5 files changed

+166
-87
lines changed

libzim/libwrapper.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <zim/item.h>
2929
#include <zim/writer/item.h>
3030
#include <zim/writer/contentProvider.h>
31+
#include <zim/search.h>
3132

3233
struct _object;
3334
typedef _object PyObject;
@@ -137,6 +138,7 @@ class WArchive : public Wrapper<zim::Archive>
137138
WArchive() = default;
138139
WArchive(const std::string& filename) : Wrapper(zim::Archive(filename)) {};
139140
WArchive(const zim::Archive& o) : Wrapper(o) {};
141+
zim::Archive& operator*() const { return *mp_base; }
140142

141143
FORWARD(WEntry, getEntryByPath)
142144
FORWARD(WEntry, getEntryByTitle)
@@ -167,6 +169,39 @@ class WArchive : public Wrapper<zim::Archive>
167169
FORWARD(bool, check)
168170
};
169171

172+
class WSearchResultSet : public Wrapper<zim::SearchResultSet>
173+
{
174+
public:
175+
WSearchResultSet() = default;
176+
WSearchResultSet(const zim::SearchResultSet& o) : Wrapper(o) {};
177+
178+
179+
FORWARD(zim::SearchIterator, begin)
180+
FORWARD(zim::SearchIterator, end)
181+
FORWARD(int, size)
182+
};
183+
184+
class WSearch : public Wrapper<zim::Search>
185+
{
186+
public:
187+
WSearch() = default;
188+
WSearch(zim::Search&& s) : Wrapper(std::move(s)) {};
189+
190+
FORWARD(int, getEstimatedMatches)
191+
FORWARD(WSearchResultSet, getResults)
192+
};
193+
194+
class WSearcher : public Wrapper<zim::Searcher>
195+
{
196+
public:
197+
WSearcher() = default;
198+
WSearcher(const WArchive& a) : Wrapper(zim::Searcher(*a)) {};
199+
WSearcher(const zim::Searcher& o) : Wrapper(o) {};
200+
201+
FORWARD(void, setVerbose)
202+
FORWARD(WSearch, search)
203+
};
204+
170205
#undef FORWARD
171206

172207

libzim/searcher.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
""" libzim reader module
2+
3+
- Archive to open and read ZIM files
4+
- `Archive` gives access to all `Entry`
5+
- `Entry` gives access to `Item` (content)
6+
7+
Usage:
8+
9+
with Archive(fpath) as zim:
10+
entry = zim.get_entry_by_path(zim.main_entry.path)
11+
print(f"Article {entry.title} at {entry.path} is "
12+
f"{entry.get_item().content.nbytes}b")
13+
"""
14+
15+
# flake8: noqa
16+
from .wrapper import Searcher, PyQuery as Query
17+
18+
19+
__all__ = ["Searcher", "Query"]

libzim/wrapper.pxd

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,21 @@ cdef extern from "libwrapper.h":
7676
cdef cppclass WriterItemWrapper:
7777
WriterItemWrapper(PyObject* obj) except +
7878

79+
cdef extern from "zim/search.h" namespace "zim":
80+
cdef cppclass Query:
81+
Query()
82+
Query& setQuery(string query)
83+
Query& setGeorange(float latitude, float longitude, float distance)
84+
85+
86+
cdef extern from "zim/search_iterator.h" namespace "zim":
87+
cdef cppclass SearchIterator:
88+
SearchIterator()
89+
SearchIterator operator++()
90+
bint operator==(SearchIterator)
91+
bint operator!=(SearchIterator)
92+
string getPath()
93+
string getTitle()
7994

8095
# Import the cpp wrappers.
8196
cdef extern from "libwrapper.h":
@@ -145,11 +160,17 @@ cdef extern from "libwrapper.h":
145160
bool hasChecksum() except +
146161
bool check() except +
147162

148-
cdef extern from "zim/search_iterator.h" namespace "zim":
149-
cdef cppclass SearchIterator:
150-
SearchIterator()
151-
SearchIterator operator++()
152-
bint operator==(search_iterator)
153-
bint operator!=(search_iterator)
154-
string getPath()
155-
string getTitle()
163+
cdef cppclass WSearcher:
164+
WSearcher()
165+
WSearcher(const WArchive& archive) except +
166+
setVerbose(bool verbose)
167+
WSearch search(Query query) except +
168+
169+
cdef cppclass WSearch:
170+
int getEstimatedMatches() except +
171+
WSearchResultSet getResults(int start, int count) except +
172+
173+
cdef cppclass WSearchResultSet:
174+
SearchIterator begin()
175+
SearchIterator end()
176+
int size()

libzim/wrapper.pyx

Lines changed: 65 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ from typing import Generator
2727
from cython.operator import dereference, preincrement
2828
from cpython.ref cimport PyObject
2929
from cpython.buffer cimport PyBUF_WRITABLE
30+
from cython.operator import preincrement
3031

3132
from libc.stdint cimport uint64_t
3233
from libcpp.string cimport string
@@ -647,92 +648,85 @@ cdef class PyArchive:
647648
except RuntimeError as e:
648649
raise KeyError(str(e))
649650

650-
def suggest(self, query: str, start: int = 0, end: int = 10) -> Generator[str, None, None]:
651-
""" Paths of suggested entries in the archive from a title query -> Generator[str, None, None]
651+
def __repr__(self) -> str:
652+
return f"{self.__class__.__name__}(filename={self.filename})"
652653

653-
Parameters
654-
----------
655-
query : str
656-
Title query string
657-
start : int
658-
Iterator start (default 0)
659-
end : end
660-
Iterator end (default 10)
661-
Returns
662-
-------
663-
Generator
664-
Path of suggested entry """
665-
# cdef wrapper.ZimSearch search = wrapper.ZimSearch(dereference(self.c_archive))
666-
# search.set_suggestion_mode(True)
667-
# search.set_query(query.encode('UTF-8'))
668-
# search.set_range(start, end)
669654

670-
# cdef wrapper.search_iterator it = search.begin()
671-
# while it != search.end():
672-
# yield it.get_path().decode('UTF-8')
673-
# preincrement(it)
655+
#########################
656+
#  Searcher #
657+
#########################
674658

675-
def search(self, query: str, start: int = 0, end: int = 10) -> Generator[str, None, None]:
676-
""" Paths of entries in the archive from a search query -> Generator[str, None, None]
659+
cdef class PyQuery:
660+
cdef wrapper.Query c_query
677661

678-
Parameters
679-
----------
680-
query : str
681-
Query string
682-
start : int
683-
Iterator start (default 0)
684-
end : end
685-
Iterator end (default 10)
686-
Returns
687-
-------
688-
Generator
689-
Path of entry matching the search query """
662+
def set_query(self, query: str):
663+
self.c_query.setQuery(query.encode('utf8'))
664+
665+
666+
cdef class SearchResultSet:
667+
cdef wrapper.WSearchResultSet c_resultset
668+
669+
@staticmethod
670+
cdef from_resultset(wrapper.WSearchResultSet _resultset):
671+
cdef SearchResultSet resultset = SearchResultSet()
672+
resultset.c_resultset = move(_resultset)
673+
return resultset
690674

691-
# cdef wrapper.ZimSearch search = wrapper.ZimSearch(dereference(self.c_archive))
692-
# search.set_suggestion_mode(False)
693-
# search.set_query(query.encode('UTF-8'))
694-
# search.set_range(start, end)
675+
def __iter__(self):
676+
cdef wrapper.SearchIterator current = self.c_resultset.begin()
677+
cdef wrapper.SearchIterator end = self.c_resultset.end()
678+
while current != end:
679+
yield current.getPath().decode('UTF-8')
680+
preincrement(current)
695681

696-
# cdef wrapper.search_iterator it = search.begin()
697-
# while it != search.end():
698-
# yield it.get_path().decode('UTF-8')
699-
# preincrement(it)
682+
cdef class Search:
683+
cdef wrapper.WSearch c_search
700684

701-
def get_estimated_search_results_count(self, query: str) -> int:
702-
""" Estimated number of search results for a query -> int
685+
# Factory functions - Currently Cython can't use classmethods
686+
@staticmethod
687+
cdef from_search(wrapper.WSearch _search):
688+
""" Creates a python ReadArticle from a C++ Article (zim::) -> ReadArticle
703689
704690
Parameters
705691
----------
706-
query : str
707-
Query string
692+
_item : Item
693+
A C++ Item
708694
Returns
709-
-------
710-
int
711-
Estimated number of search results """
712-
# cdef wrapper.ZimSearch search = wrapper.ZimSearch(dereference(self.c_archive))
713-
# search.set_suggestion_mode(False)
714-
# search.set_query(query.encode('UTF-8'))
715-
# search.set_range(0, self.entry_count)
695+
------
696+
Item
697+
Casted item """
698+
cdef Search search = Search()
699+
search.c_search = move(_search)
700+
return search
701+
702+
def getEstimatedMatches(self):
703+
return self.c_search.getEstimatedMatches()
704+
705+
def getResults(self, start, count):
706+
return SearchResultSet.from_resultset(move(self.c_search.getResults(start, count)))
707+
708+
709+
cdef class Searcher:
710+
""" Zim Archive Searcher
711+
712+
Attributes
713+
----------
714+
*c_archive : Searcher
715+
a pointer to a C++ Searcher object
716+
"""
716717

717-
# return search.get_matches_estimated()
718+
cdef wrapper.WSearcher c_searcher
718719

719-
def get_estimated_suggestions_results_count(self, query: str) -> int:
720-
""" Estimated number of suggestions for a query -> int
720+
def __cinit__(self, object archive: PyArchive):
721+
""" Constructs an Archive from full zim file path
721722
722723
Parameters
723724
----------
724-
query : str
725-
Query string
726-
Returns
727-
-------
728-
int
729-
Estimated number of article suggestions """
730-
# cdef wrapper.ZimSearch search = wrapper.ZimSearch(dereference(self.c_archive))
731-
# search.set_suggestion_mode(True)
732-
# search.set_query(query.encode('UTF-8'))
733-
# search.set_range(0, self.entry_count)
725+
filename : pathlib.Path
726+
Full path to a zim file """
734727

735-
# return search.get_matches_estimated()
728+
self.c_searcher = move(wrapper.WSearcher(archive.c_archive))
729+
730+
def search(self, object query: PyQuery):
731+
return Search.from_search(move(self.c_searcher.search(query.c_query)))
736732

737-
def __repr__(self) -> str:
738-
return f"{self.__class__.__name__}(filename={self.filename})"

tests/test_libzim_reader.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import libzim.writer
1212
from libzim.reader import Archive
13+
from libzim.searcher import Searcher, Query
1314

1415

1516
# expected data for tests ZIMs (see `all_zims`)
@@ -33,10 +34,10 @@
3334
"entry_count": 0,
3435
"all_entry_count": 2,
3536
"article_count": 0,
36-
"suggestion_string": "",
37+
"suggestion_string": None,
3738
"suggestion_count": 0,
3839
"suggestion_result": [],
39-
"search_string": "",
40+
"search_string": None,
4041
"search_count": 0,
4142
"search_result": [],
4243
"test_path": None,
@@ -124,9 +125,13 @@
124125
"suggestion_string": "favicon",
125126
"suggestion_count": 1,
126127
"suggestion_result": ["favicon.png"],
127-
"search_string": "favicon",
128-
"search_count": 1,
129-
"search_result": ["favicon.png"],
128+
"search_string": "main",
129+
"search_count": 2,
130+
"search_result": [
131+
"Wikibooks.html",
132+
"FreedomBox for Communities_Offline Wikipedia "
133+
+ "- Wikibooks, open books for an open world.html",
134+
],
130135
"test_path": "FreedomBox for Communities_Offline Wikipedia - Wikibooks, "
131136
"open books for an open world.html",
132137
"test_title": "FreedomBox for Communities/Offline Wikipedia - Wikibooks, "
@@ -398,15 +403,20 @@ def test_reader_suggest_search(
398403
assert zim.all_entry_count == all_entry_count
399404
assert zim.article_count == article_count
400405

406+
if search_string is not None:
407+
query = Query()
408+
query.set_query(search_string)
409+
searcher = Searcher(zim)
410+
search = searcher.search(query)
411+
assert search.getEstimatedMatches() == search_count
412+
assert list(search.getResults(0, search_count)) == search_result
401413

402-
# TODO: restore [search-api]
414+
#TODO: restore suggestion search
403415
# assert (
404416
# zim.get_estimated_suggestions_results_count(suggestion_string)
405417
# == suggestion_count
406418
# )
407419
# assert list(zim.suggest(suggestion_string)) == suggestion_result
408-
# assert zim.get_estimated_search_results_count(search_string) == search_count
409-
# assert list(zim.search(search_string)) == search_result
410420

411421

412422
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)