Skip to content

Commit 6e7254f

Browse files
committed
Search: Add .ppt helper.
1 parent 8170aa6 commit 6e7254f

File tree

3 files changed

+282
-1
lines changed

3 files changed

+282
-1
lines changed

search-helpers/meson.build

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,25 @@ mso_to_txt_sources = [
44
'nemo-mso-to-txt.c'
55
]
66

7-
watcher = executable('nemo-mso-to-txt',
7+
mso_to_txt = executable('nemo-mso-to-txt',
88
mso_to_txt_sources,
99
dependencies: [libgsf, gio, glib],
1010
install: true
1111
)
1212

13+
ppt_to_txt_sources = [
14+
'nemo-ppt-to-txt.c'
15+
]
16+
17+
ppt_to_txt = executable('nemo-ppt-to-txt',
18+
ppt_to_txt_sources,
19+
dependencies: [gio, glib],
20+
install: true
21+
)
22+
1323
install_data(
1424
'mso.nemo_search_helper',
25+
'mso-ppt.nemo_search_helper',
1526
install_dir: join_paths(nemoDataPath, 'search-helpers')
1627
)
1728

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[Nemo Search Helper]
2+
TryExec=nemo-ppt-to-txt;libreoffice;
3+
Exec=nemo-ppt-to-txt %s
4+
MimeType=application/vnd.ms-powerpoint;
5+
Priority=100

search-helpers/nemo-ppt-to-txt.c

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
/* Nemo is free software; you can redistribute it and/or
2+
* modify it under the terms of the GNU General Public License as
3+
* published by the Free Software Foundation; either version 2 of the
4+
* License, or (at your option) any later version.
5+
*
6+
* Nemo is distributed in the hope that it will be useful,
7+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
8+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9+
* General Public License for more details.
10+
*
11+
* You should have received a copy of the GNU General Public
12+
* License along with this program; see the file COPYING. If not,
13+
* write to the Free Software Foundation, Inc., 51 Franklin Street - Suite 500,
14+
* Boston, MA 02110-1335, USA.
15+
*/
16+
17+
#include <stdlib.h>
18+
#include <glib.h>
19+
#include <gio/gio.h>
20+
#include <glib/gprintf.h>
21+
22+
static void
23+
cleanup_tmp_dir (const gchar *tmp_dir,
24+
GFile *xml_file)
25+
{
26+
if (tmp_dir == NULL || xml_file == NULL) {
27+
return;
28+
}
29+
30+
GFile *parent;
31+
32+
parent = g_file_get_parent (xml_file);
33+
34+
g_file_delete (xml_file, NULL, NULL);
35+
g_file_delete (parent, NULL, NULL);
36+
37+
g_object_unref (parent);
38+
}
39+
40+
static gchar *
41+
get_tmp_dir (GError **error)
42+
{
43+
gchar *tmp_dir = NULL;
44+
45+
// Create our temp dir in /dev/shm if it's available,
46+
// otherwise use whatever glib ends up with (/tmp probably).
47+
48+
if (g_file_test ("/dev/shm", G_FILE_TEST_IS_DIR)) {
49+
gchar *old_env_tmp;
50+
51+
old_env_tmp = g_strdup (g_getenv ("TMPDIR"));
52+
if (g_setenv ("TMPDIR", "/dev/shm", TRUE)) {
53+
tmp_dir = g_dir_make_tmp ("nemo-search-helper-XXXXXX", NULL);
54+
55+
if (old_env_tmp != NULL) {
56+
g_setenv ("TMPDIR", old_env_tmp, TRUE);
57+
g_free (old_env_tmp);
58+
} else {
59+
g_unsetenv ("TMPDIR");
60+
}
61+
}
62+
}
63+
64+
if (tmp_dir != NULL) {
65+
return tmp_dir;
66+
}
67+
68+
return g_dir_make_tmp ("nemo-search-helper-XXXXXX", error);
69+
}
70+
71+
gchar *
72+
run_regex_replace (const gchar *pattern,
73+
gchar *input,
74+
const gchar *replacement,
75+
GError **error)
76+
{
77+
GRegex *re;
78+
gchar *out;
79+
80+
out = NULL;
81+
82+
re = g_regex_new (pattern,
83+
G_REGEX_OPTIMIZE,
84+
0,
85+
error);
86+
87+
if (re == NULL) {
88+
return NULL;
89+
}
90+
91+
out = g_regex_replace_literal (re, input, -1, 0, replacement, 0, error);
92+
g_free (input);
93+
g_regex_unref (re);
94+
95+
return out;
96+
}
97+
98+
int
99+
main (int argc, char *argv[])
100+
{
101+
if (argc < 2) {
102+
g_printerr ("Need a filename\n");
103+
return 1;
104+
}
105+
106+
GSubprocess *lo_proc;
107+
GFile *xml_file;
108+
GError *error;
109+
110+
gchar *tmp_dir = NULL;
111+
gchar *name_only = NULL;
112+
gchar *ptr;
113+
gchar *orig_file_path = NULL, *orig_basename = NULL;
114+
gchar *xml_file_path = NULL, *xml_basename = NULL;
115+
gchar *content = NULL;
116+
117+
gint retval;
118+
gsize length;
119+
120+
orig_file_path = g_strdup (argv[1]);
121+
122+
orig_basename = g_path_get_basename (orig_file_path);
123+
ptr = g_strrstr (orig_basename, ".");
124+
125+
name_only = g_strndup (orig_basename, ptr - orig_basename);
126+
g_free (orig_basename);
127+
128+
retval = 0;
129+
xml_file = NULL;
130+
error = NULL;
131+
tmp_dir = get_tmp_dir (&error);
132+
133+
if (tmp_dir == NULL) {
134+
if (error != NULL) {
135+
g_warning ("Could not create a temp dir for conversion: %s", error->message);
136+
g_clear_error (&error);
137+
}
138+
139+
retval = 1;
140+
goto out;
141+
}
142+
143+
gchar *lo_args[7] = {
144+
"libreoffice",
145+
"--convert-to", "xml",
146+
"--outdir", tmp_dir,
147+
orig_file_path,
148+
NULL
149+
};
150+
151+
lo_proc = g_subprocess_newv ((const gchar * const *) lo_args,
152+
G_SUBPROCESS_FLAGS_STDERR_SILENCE | G_SUBPROCESS_FLAGS_STDOUT_SILENCE,
153+
&error);
154+
155+
if (lo_proc == NULL) {
156+
if (error != NULL) {
157+
g_warning ("Could not lauch headless libreoffice for conversion: %s", error->message);
158+
g_clear_error (&error);
159+
}
160+
retval = 1;
161+
goto out;
162+
}
163+
164+
g_subprocess_wait (lo_proc, NULL, &error);
165+
g_object_unref (lo_proc);
166+
g_free (orig_file_path);
167+
168+
if (error != NULL) {
169+
g_warning ("LibreOffice was unable to convert ppt to xml: %s", error->message);
170+
g_clear_error (&error);
171+
retval = 1;
172+
goto out;
173+
}
174+
175+
xml_basename = g_strconcat (name_only, ".xml", NULL);
176+
xml_file_path = g_build_filename (tmp_dir, xml_basename, NULL);
177+
178+
xml_file = g_file_new_for_path (xml_file_path);
179+
180+
if (!g_file_load_contents (xml_file,
181+
NULL,
182+
&content,
183+
&length,
184+
NULL,
185+
&error)) {
186+
if (error != NULL) {
187+
g_warning ("Unable to read xml file: %s", error->message);
188+
g_clear_error (&error);
189+
retval = 1;
190+
goto out;
191+
}
192+
}
193+
194+
// remove doc settings which has content but is uninteresting
195+
content = run_regex_replace ("<office:settings>[\\s\\S]*?</office:settings>",
196+
content,
197+
"",
198+
&error);
199+
200+
if (content == NULL) {
201+
goto out;
202+
}
203+
204+
// remove any binary data content like embedded images
205+
content = run_regex_replace ("<office:binary-data>[\\s\\S]*?</office:binary-data>",
206+
content,
207+
"",
208+
&error);
209+
210+
if (content == NULL) {
211+
goto out;
212+
}
213+
214+
// remove any escaped markup as content
215+
content = run_regex_replace ("&lt;[\\s\\S]*?&gt;",
216+
content,
217+
"",
218+
&error);
219+
220+
if (content == NULL) {
221+
goto out;
222+
}
223+
224+
// remove all remaining markup
225+
content = run_regex_replace ("<[^>]+>",
226+
content,
227+
" ",
228+
&error);
229+
230+
if (content == NULL) {
231+
goto out;
232+
}
233+
234+
// remove excess whitespace, replace with a single space
235+
content = run_regex_replace ("\\s+",
236+
content,
237+
" ",
238+
&error);
239+
240+
if (content == NULL) {
241+
goto out;
242+
}
243+
244+
g_printf ("%s", content);
245+
246+
out:
247+
g_free (content);
248+
g_free (name_only);
249+
250+
g_free (xml_basename);
251+
g_free (xml_file_path);
252+
253+
if (error != NULL)
254+
{
255+
g_critical ("Could not extract strings from ppt file: %s", error->message);
256+
g_error_free (error);
257+
retval = 1;
258+
}
259+
260+
cleanup_tmp_dir (tmp_dir, xml_file);
261+
g_clear_object (&xml_file);
262+
g_free (tmp_dir);
263+
264+
return retval;
265+
}

0 commit comments

Comments
 (0)