File tree Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -42,10 +42,12 @@ export async function importOCRFiles(ocrFilesAll) {
42
42
const hocrStrAll = await readOcrFile ( ocrFilesAll [ 0 ] ) ;
43
43
44
44
// Check whether input is Abbyy XML
45
+ // TODO: The auto-detection of formats needs to be more robust.
46
+ // At present, any string that contains ">" and "abbyy" is considered Abbyy XML.
45
47
const node2 = hocrStrAll . match ( / > ( [ ^ > ] + ) / ) ?. [ 1 ] ;
46
48
abbyyMode = ! ! node2 && ! ! / a b b y y / i. test ( node2 ) ;
47
49
stextMode = ! ! node2 && ! ! / < d o c u m e n t n a m e / . test ( node2 ) ;
48
- textractMode = ! node2 && ! ! / " A n a l y z e D o c u m e n t M o d e l V e r s i o n " / i. test ( hocrStrAll ) ;
50
+ textractMode = ! abbyyMode && ! stextMode && ! ! / " A n a l y z e D o c u m e n t M o d e l V e r s i o n " / i. test ( hocrStrAll ) ;
49
51
50
52
if ( textractMode ) {
51
53
hocrRaw = [ hocrStrAll ] ;
@@ -68,7 +70,7 @@ export async function importOCRFiles(ocrFilesAll) {
68
70
const hocrStrFirst = await readOcrFile ( ocrFilesAll [ 0 ] ) ;
69
71
const node2 = hocrStrFirst . match ( / > ( [ ^ > ] + ) / ) ?. [ 1 ] ;
70
72
abbyyMode = ! ! node2 && ! ! / a b b y y / i. test ( node2 ) ;
71
- textractMode = ! node2 && ! ! / " A n a l y z e D o c u m e n t M o d e l V e r s i o n " / i. test ( hocrStrFirst ) ;
73
+ textractMode = ! abbyyMode && ! ! / " A n a l y z e D o c u m e n t M o d e l V e r s i o n " / i. test ( hocrStrFirst ) ;
72
74
73
75
for ( let i = 0 ; i < pageCountHOCR ; i ++ ) {
74
76
const hocrFile = ocrFilesAll [ i ] ;
You can’t perform that action at this time.
0 commit comments