Skip to content

Commit 9b6b504

Browse files
committed
Updated mupdf to improve word segmentation
1 parent d8fdd48 commit 9b6b504

File tree

3 files changed

+19
-3
lines changed

3 files changed

+19
-3
lines changed

mupdf/libmupdf.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -626,13 +626,13 @@ function unexportedRuntimeSymbol(sym) {
626626
}
627627

628628
var ASM_CONSTS = {
629-
2200888: () => {
629+
2200904: () => {
630630
throw new libmupdf.TryLaterError("operation in progress");
631631
},
632-
2200951: $0 => {
632+
2200967: $0 => {
633633
throw new Error(UTF8ToString($0));
634634
},
635-
2200990: () => {
635+
2201006: () => {
636636
throw new Error("Cannot create MuPDF context!");
637637
}
638638
};

mupdf/libmupdf.wasm

197 Bytes
Binary file not shown.

tests/module/importPdfText.spec.js

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,3 +428,19 @@ describe('Check that font style is detected for PDF imports.', function () {
428428
await scribe.terminate();
429429
});
430430
}).timeout(120000);
431+
432+
describe('Check that symbols are detected for PDF imports.', function () {
433+
this.timeout(10000);
434+
435+
it('Symbols are not combined with words', async () => {
436+
scribe.opt.usePDFText.native.main = true;
437+
// An earlier version combined the checkbox with the first word.
438+
await scribe.importFiles([`${ASSETS_PATH_KARMA}/high-risk_protection_order_application_for_and_declaration_in_support_of_mandatory_use.pdf`]);
439+
assert.strictEqual(scribe.data.ocr.active[0].lines[9].words.length, 4);
440+
assert.strictEqual(scribe.data.ocr.active[0].lines[9].words[1].text, 'Attorney,');
441+
}).timeout(10000);
442+
443+
after(async () => {
444+
await scribe.terminate();
445+
});
446+
}).timeout(120000);

0 commit comments

Comments
 (0)