Skip to content

Commit 0b1d10f

Browse files
committed
search: Replace third-party OpenDocument search helper.
The odt2txt package is extremely slow when working with larger documents, and has not seen an update in 7+ years. Fixes #3481
1 parent c4a14be commit 0b1d10f

File tree

5 files changed

+53
-6
lines changed

5 files changed

+53
-6
lines changed

debian/control

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ Depends:
9494
poppler-utils,
9595
exif,
9696
id3,
97-
odt2txt,
9897
catdoc,
9998
untex,
10099
html2text,

search-helpers/meson.build

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ ppt_to_txt = executable('nemo-ppt-to-txt',
2121
)
2222

2323
install_data(
24-
'nemo-xls-to-txt',
24+
['nemo-xls-to-txt', 'nemo-odf-to-txt'],
2525
install_dir: join_paths(get_option('prefix'), get_option('bindir')),
2626
install_mode: 'rwxr-xr-x'
2727
)
@@ -31,6 +31,7 @@ install_data(
3131
'mso.nemo_search_helper',
3232
'mso-ppt.nemo_search_helper',
3333
'mso-xls.nemo_search_helper',
34+
'odf.nemo_search_helper',
3435
install_dir: join_paths(nemoDataPath, 'search-helpers')
3536
)
3637

search-helpers/nemo-odf-to-txt

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#!/usr/bin/python3
2+
3+
import sys
4+
import zipfile
5+
from pathlib import Path
6+
from html.parser import HTMLParser
7+
8+
class Parser(HTMLParser):
9+
parsed = []
10+
get_next_data = False
11+
concat_next_data = False
12+
13+
def handle_starttag(self, tag, attrs):
14+
if tag.startswith("text") or tag.startswith("meta"):
15+
if tag == "text:s":
16+
self.concat_next_data = True
17+
self.get_next_data = True
18+
19+
def handle_endtag(self, tag):
20+
pass
21+
22+
def handle_data(self, data):
23+
if self.get_next_data:
24+
if data != "\n":
25+
if self.concat_next_data:
26+
self.parsed[-1] += " " + data
27+
self.concat_next_data = False
28+
else:
29+
self.parsed.append(data.strip())
30+
self.get_next_data = False
31+
32+
path = sys.argv[1]
33+
34+
parser = Parser()
35+
zipfile = zipfile.ZipFile(path)
36+
files = zipfile.infolist()
37+
38+
for f in files:
39+
if f.filename in ("meta.xml", "content.xml"):
40+
contents = zipfile.read(f.filename).decode()
41+
parser.feed(contents)
42+
if len(parser.parsed) == 0:
43+
continue
44+
45+
out_str = ", ".join(parser.parsed)
46+
print(f"{Path(f.filename).stem}: {out_str}\n", flush=True, file=sys.stdout)
47+
parser.parsed = []
48+
exit(0)
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[Nemo Search Helper]
2-
TryExec=odt2txt;
3-
Exec=odt2txt %s
2+
TryExec=nemo-odf-to-txt;
3+
Exec=nemo-odf-to-txt %s
44
MimeType=application/vnd.oasis.opendocument.text;application/vnd.oasis.opendocument.spreadsheet;application/vnd.oasis.opendocument.presentation;application/vnd.oasis.opendocument.graphics;
5-
priority=100
5+
Priority=100

search-helpers/third-party/meson.build

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ helpers = [
22
'untex.nemo_search_helper',
33
'exif.nemo_search_helper',
44
'id3.nemo_search_helper',
5-
'libreoffice.nemo_search_helper',
65
'mso-doc.nemo_search_helper',
76
'pdf2txt.nemo_search_helper',
87
'pdftotext.nemo_search_helper',

0 commit comments

Comments
 (0)