@@ -783,3 +783,46 @@ def test_creator_badfilename(tmpdir):
783783 # forward slash points to non-existing folder
784784 with pytest .raises (IOError ):
785785 Creator (tmpdir / "test/test.zim" )
786+
787+
788+ def test_accented_search_from_libzim (fpath ):
789+ """copy of libzim accented search test
790+
791+ https://github.com/openzim/libzim/blob/main/test/search.cpp#L290 (88543b00)"""
792+
793+ with Creator (fpath ).config_verbose (True ).config_indexing (True , "eng" ) as creator :
794+ creator .add_item (
795+ StaticItem (
796+ path = "path0" ,
797+ title = "Test Article0" ,
798+ content = "This is a tèst articlé. temp0" ,
799+ mimetype = "text/html" ,
800+ )
801+ )
802+
803+ creator .add_item (
804+ StaticItem (
805+ path = "path1" ,
806+ title = "Test Article1" ,
807+ content = "This is another test article. For article1." ,
808+ mimetype = "text/html" ,
809+ )
810+ )
811+
812+ zim = Archive (fpath )
813+
814+ assert zim .entry_count == 2
815+ assert zim .article_count == 2
816+ assert zim .all_entry_count == 7
817+
818+ ascii_query = Query ().set_query ("test article" )
819+ ascii_searcher = Searcher (zim )
820+ ascii_search = ascii_searcher .search (ascii_query )
821+ assert ascii_search .getEstimatedMatches () == zim .article_count
822+ assert list (ascii_search .getResults (0 , zim .article_count )) == ["path0" , "path1" ]
823+
824+ accented_query = Query ().set_query ("test àrticlé" )
825+ accented_searcher = Searcher (zim )
826+ accented_search = accented_searcher .search (accented_query )
827+ assert accented_search .getEstimatedMatches () == zim .article_count
828+ assert list (accented_search .getResults (0 , zim .article_count )) == ["path0" , "path1" ]
0 commit comments