Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/mupdf/pdf/document.h
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,7 @@ typedef struct
int do_use_objstms; /* Use objstms if possible */
int compression_effort; /* 0 for default. 100 = max, 1 = min. */
int do_labels; /* Add labels to each object showing how it can be reached from the Root. */
int do_strip_invisible_text; /* Strip invisible text, requires sanitize. */
} pdf_write_options;

FZ_DATA extern const pdf_write_options pdf_default_write_options;
Expand Down
41 changes: 41 additions & 0 deletions source/pdf/pdf-write.c
Original file line number Diff line number Diff line change
Expand Up @@ -1879,6 +1879,7 @@ const char *fz_pdf_write_options_usage =
"\tlinearize: optimize for web browsers (no longer supported!)\n"
"\tclean: pretty-print graphics commands in content streams\n"
"\tsanitize: sanitize graphics commands in content streams\n"
"\tstrip-invisible-text: strip invisible text in content streams\n"
"\tgarbage: garbage collect unused objects\n"
"\tor garbage=compact: ... and compact cross reference table\n"
"\tor garbage=deduplicate: ... and remove duplicate objects\n"
Expand Down Expand Up @@ -1930,6 +1931,8 @@ pdf_parse_write_options(fz_context *ctx, pdf_write_options *opts, const char *ar
opts->do_clean = fz_option_eq(val, "yes");
if (fz_has_option(ctx, args, "sanitize", &val))
opts->do_sanitize = fz_option_eq(val, "yes");
if (fz_has_option(ctx, args, "strip-invisible-text", &val))
opts->do_strip_invisible_text = fz_option_eq(val, "yes");
if (fz_has_option(ctx, args, "incremental", &val))
opts->do_incremental = fz_option_eq(val, "yes");
if (fz_has_option(ctx, args, "objstms", &val))
Expand Down Expand Up @@ -2755,6 +2758,37 @@ void pdf_write_document(fz_context *ctx, pdf_document *doc, fz_output *out, cons
do_pdf_save_document(ctx, doc, &opts, in_opts);
}

static void pdf_strip_invisible_text(fz_context *ctx, pdf_document *doc)
{
int i;
int n = pdf_count_pages(ctx, doc);
fz_rect rect;
pdf_annot *annot = NULL;
pdf_page *page = NULL;
pdf_redact_options opts = { 0, PDF_REDACT_IMAGE_NONE, PDF_REDACT_LINE_ART_NONE, PDF_REDACT_TEXT_REMOVE_INVISIBLE };

fz_var(page);

fz_try(ctx)
{
for (i = 0; i < n; i++)
{
page = pdf_load_page(ctx, doc, i);
annot = pdf_create_annot(ctx, page, PDF_ANNOT_REDACT);
rect = pdf_bound_page(ctx, page, FZ_MEDIA_BOX);
pdf_set_annot_rect(ctx, annot, rect);
pdf_redact_page(ctx, doc, page, &opts);
pdf_drop_page(ctx, page);
page = NULL;
}
}
fz_catch(ctx)
{
pdf_drop_page(ctx, page);
fz_rethrow(ctx);
}
}

void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename, const pdf_write_options *in_opts)
{
pdf_write_options opts_defaults = pdf_default_write_options;
Expand All @@ -2776,6 +2810,8 @@ void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename,
fz_throw(ctx, FZ_ERROR_ARGUMENT, "Linearisation is no longer supported");
if (in_opts->do_incremental && in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't do incremental writes when changing encryption");
if (in_opts->do_strip_invisible_text && in_opts->do_sanitize == 0)
fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't strip invisible text without sanitizing");
if (in_opts->do_snapshot)
{
if (in_opts->do_incremental == 0 ||
Expand All @@ -2793,6 +2829,9 @@ void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename,
fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't use these options when snapshotting!");
}

if (in_opts->do_strip_invisible_text)
pdf_strip_invisible_text(ctx, doc);

if (in_opts->do_appearance > 0)
{
int i, n = pdf_count_pages(ctx, doc);
Expand Down Expand Up @@ -2885,6 +2924,8 @@ pdf_format_write_options(fz_context *ctx, char *buffer, size_t buffer_len, const
ADD_OPT("clean=yes");
if (opts->do_sanitize)
ADD_OPT("sanitize=yes");
if (opts->do_strip_invisible_text)
ADD_OPT("strip-invisible-text=yes");
if (opts->do_incremental)
ADD_OPT("incremental=yes");
if (opts->do_encrypt == PDF_ENCRYPT_NONE)
Expand Down
3 changes: 2 additions & 1 deletion source/tools/pdfclean.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ int pdfclean_main(int argc, char **argv)
opts.write = pdf_default_write_options;
opts.write.dont_regenerate_id = 1;

while ((c = fz_getopt_long(argc, argv, "ade:fgilmp:stcvzDAE:LO:U:P:SZ", longopts)) != -1)
while ((c = fz_getopt_long(argc, argv, "ade:fgiIlmp:stcvzDAE:LO:U:P:SZ", longopts)) != -1)
{
switch (c)
{
Expand All @@ -144,6 +144,7 @@ int pdfclean_main(int argc, char **argv)
case 'z': opts.write.do_compress += 1; break;
case 'f': opts.write.do_compress_fonts += 1; break;
case 'i': opts.write.do_compress_images += 1; break;
case 'I': opts.write.do_strip_invisible_text += 1; break;
case 'a': opts.write.do_ascii += 1; break;
case 'e': opts.write.compression_effort = fz_atoi(fz_optarg); break;
case 'g': opts.write.do_garbage += 1; break;
Expand Down