@@ -13,8 +13,7 @@ scribe.opt.saveDebugImages = debugMode;
13
13
/**
14
14
* @param {string } func
15
15
* @param {Object } params
16
- * @param {string } [params.pdfFile]
17
- * @param {string } [params.ocrFile]
16
+ * @param {string[] } [params.files]
18
17
* @param {string } [params.outputDir]
19
18
* @param {Array<string> } [params.list]
20
19
* @param {boolean } [params.robustConfMode]
@@ -26,6 +25,10 @@ scribe.opt.saveDebugImages = debugMode;
26
25
async function main ( func , params ) {
27
26
scribe . opt . workerN = params . workerN || null ;
28
27
28
+ if ( ! params . files || params . files . length === 0 ) {
29
+ throw new Error ( 'No input files provided.' ) ;
30
+ }
31
+
29
32
await scribe . init ( {
30
33
pdf : true ,
31
34
ocr : true ,
@@ -39,15 +42,9 @@ async function main(func, params) {
39
42
40
43
const output = { } ;
41
44
42
- const files = [ ] ;
43
- if ( params . pdfFile ) files . push ( params . pdfFile ) ;
44
- if ( params . ocrFile ) files . push ( params . ocrFile ) ;
45
- await scribe . importFiles ( files ) ;
45
+ await scribe . importFiles ( params . files ) ;
46
46
47
- const backgroundArg = params . pdfFile ;
48
- const backgroundStem = backgroundArg ? path . basename ( backgroundArg ) . replace ( / \. \w { 1 , 5 } $ / i, '' ) : undefined ;
49
- const ocrStem = params . ocrFile ? path . basename ( params . ocrFile ) . replace ( / \. \w { 1 , 5 } $ / i, '' ) : undefined ;
50
- const outputStem = backgroundStem || ocrStem || 'output' ;
47
+ const outputStem = scribe . inputData . defaultDownloadFileName . replace ( / \. \w { 1 , 6 } $ / i, '' ) || 'output' ;
51
48
52
49
const outputDir = params . outputDir || '.' ;
53
50
@@ -75,23 +72,30 @@ async function main(func, params) {
75
72
}
76
73
}
77
74
78
- if ( [ 'overlay' , 'recognize' ] . includes ( func ) && backgroundArg ) {
75
+ if ( [ 'overlay' , 'recognize' ] . includes ( func ) && ( scribe . inputData . pdfMode || scribe . inputData . imageMode ) ) {
79
76
let outputSuffix = '' ;
80
77
if ( scribe . opt . displayMode === 'proof' ) {
81
78
outputSuffix = '_vis' ;
82
79
} else if ( scribe . opt . displayMode === 'invis' ) {
83
- const resolvedInputFile = path . dirname ( path . resolve ( backgroundArg ) ) ;
84
- const resolvedOutputDir = path . resolve ( outputDir ) ;
85
- if ( resolvedInputFile === resolvedOutputDir ) {
86
- outputSuffix = '_ocr' ;
80
+
81
+ // Check if output file would overwrite any input file, and if so, add a suffix to avoid overwriting.
82
+ // This software is still in development--nobody should be ovewriting input files.
83
+ const resolvedOutputFileTmp = path . resolve ( `${ outputDir } /${ outputStem } .pdf` ) ;
84
+ for ( let i = 0 ; i < params . files . length ; i ++ ) {
85
+ const resolvedInputFile = path . resolve ( params . files [ i ] ) ;
86
+ if ( resolvedInputFile === resolvedOutputFileTmp ) {
87
+ outputSuffix = '_ocr' ;
88
+ console . log ( `Saving output with ${ outputSuffix } suffix to avoid overwriting input: ${ resolvedInputFile } ` ) ;
89
+ break ;
90
+ }
87
91
}
88
92
}
89
93
90
- const outputPath = `${ outputDir } /${ path . basename ( backgroundArg ) . replace ( / \. \w { 1 , 5 } $ / i , ` $ {outputSuffix } .pdf`) } ` ;
94
+ const outputPath = path . resolve ( `${ outputDir } /${ outputStem } $ {outputSuffix } .pdf` ) ;
91
95
await scribe . download ( 'pdf' , outputPath ) ;
92
96
93
97
if ( params . hocr ) {
94
- const outputPathHocr = `${ outputDir } /${ path . basename ( backgroundArg ) . replace ( / \. \w { 1 , 5 } $ / i , ' .hocr' ) } ` ;
98
+ const outputPathHocr = path . resolve ( `${ outputDir } /${ outputStem } .hocr` ) ;
95
99
await scribe . download ( 'hocr' , outputPathHocr ) ;
96
100
}
97
101
}
@@ -123,63 +127,59 @@ async function main(func, params) {
123
127
/**
124
128
* Print confidence of Abbyy .xml file.
125
129
*
126
- * @param {string } ocrFile
130
+ * @param {string[] } files - Paths to input files.
127
131
*/
128
- export const conf = async ( ocrFile ) => ( main ( 'conf' , { ocrFile } ) ) ;
132
+ export const conf = async ( files ) => ( main ( 'conf' , { files } ) ) ;
129
133
130
134
/**
131
135
*
132
- * @param {string } pdfFile - Path to PDF file.
133
- * @param {string } ocrFile
136
+ * @param {string[] } files - Paths to input files.
134
137
* @param {Object } options
135
138
* @param {number } [options.workers]
136
139
*/
137
- export const check = async ( pdfFile , ocrFile , options ) => ( main ( 'check' , { pdfFile , ocrFile , workerN : options ?. workers } ) ) ;
140
+ export const check = async ( files , options ) => ( main ( 'check' , { files , workerN : options ?. workers } ) ) ;
138
141
139
142
/**
140
143
* Evaluate internal OCR engine.
141
144
*
142
- * @param {string } pdfFile - Path to PDF file.
143
- * @param {string } ocrFile - Path to OCR file containing ground truth.
145
+ * @param {string[] } files - Paths to input files.
144
146
* @param {Object } options
145
147
* @param {number } [options.workers]
146
148
*/
147
- export const evalInternal = async ( pdfFile , ocrFile , options ) => ( main ( 'eval' , { pdfFile , ocrFile , workerN : options ?. workers } ) ) ;
149
+ export const evalInternal = async ( files , options ) => ( main ( 'eval' , { files , workerN : options ?. workers } ) ) ;
148
150
149
151
/**
150
152
*
151
- * @param {string } pdfFile - Path to PDF file.
152
- * @param {* } ocrFile
153
+ * @param {string[] } files - Paths to input files.
153
154
* @param {* } outputDir
154
155
* @param {Object } options
155
156
* @param {boolean } [options.robust]
156
157
* @param {boolean } [options.conf]
157
158
* @param {"eval" | "ebook" | "proof" | "invis" } [options.overlayMode]
158
159
* @param {number } [options.workers]
159
160
*/
160
- export const overlay = async ( pdfFile , ocrFile , outputDir , options ) => ( main ( 'overlay' , {
161
- pdfFile , ocrFile , outputDir, robustConfMode : options ?. robust || false , printConf : options ?. conf || false , overlayMode : options ?. overlayMode || 'invis' , workerN : options ?. workers ,
161
+ export const overlay = async ( files , outputDir , options ) => ( main ( 'overlay' , {
162
+ files , outputDir, robustConfMode : options ?. robust || false , printConf : options ?. conf || false , overlayMode : options ?. overlayMode || 'invis' , workerN : options ?. workers ,
162
163
} ) ) ;
163
164
164
165
/**
165
166
*
166
- * @param {string } pdfFile - Path to PDF file .
167
+ * @param {string[] } files - Paths to input files .
167
168
* @param {Object } options
168
169
* @param {"eval" | "ebook" | "proof" | "invis" } [options.overlayMode]
169
170
* @param {boolean } [options.hocr]
170
171
* @param {number } [options.workers]
171
172
*/
172
- export const recognize = async ( pdfFile , options ) => ( main ( 'recognize' , {
173
- pdfFile , overlayMode : options ?. overlayMode || 'invis' , workerN : options ?. workers , hocr : options ?. hocr ,
173
+ export const recognize = async ( files , options ) => ( main ( 'recognize' , {
174
+ files , overlayMode : options ?. overlayMode || 'invis' , workerN : options ?. workers , hocr : options ?. hocr ,
174
175
} ) ) ;
175
176
176
177
/**
177
178
*
178
- * @param {string } pdfFile - Path to PDF file .
179
+ * @param {string[] } files - Paths to input files .
179
180
* @param {* } outputDir
180
181
* @param {* } options
181
- * @returns
182
182
*/
183
- export const debug = async ( pdfFile , outputDir , options ) => ( main ( 'debug' , {
184
- pdfFile , outputDir, list : options ?. list ,
183
+ export const debug = async ( files , outputDir , options ) => ( main ( 'debug' , {
184
+ files , outputDir, list : options ?. list ,
185
185
} ) ) ;
0 commit comments