@@ -2,27 +2,34 @@ import fs from 'node:fs';
2
2
import path from 'node:path' ;
3
3
import scribe from '../scribe.js' ;
4
4
5
+ // TODO: Consider whether this should exist and whether it should be combined into a larger CLI utility.
6
+ // This was originally created to provide a simple interface to extract existing text from a PDF file,
7
+ // however it now does other things, and this should likely be part of a larger `convert` utility.
8
+
5
9
/**
6
10
*
7
- * @param {string } pdfFile - Path to PDF file.
11
+ * @param {string } inputFile - Path to input file.
8
12
* @param {?string } [output='.'] - Output file or directory.
9
13
* @param {Object } [options]
10
14
* @param {Parameters<typeof scribe.download>[0] } [options.format]
11
15
* @param {boolean } [options.reflow]
12
16
*/
13
- export const extract = async ( pdfFile , output , options ) => {
17
+ export const extract = async ( inputFile , output , options ) => {
14
18
const format = options ?. format || 'txt' ;
15
19
16
20
output = output || '.' ;
17
21
const outputDir = path . dirname ( output ) ;
18
- const outputFile = outputDir === output ? `${ path . basename ( pdfFile ) . replace ( / \. \w { 1 , 6 } $ / i, `.${ format } ` ) } ` : path . basename ( output ) ;
22
+ const outputFile = outputDir === output ? `${ path . basename ( inputFile ) . replace ( / \. \w { 1 , 6 } $ / i, `.${ format } ` ) } ` : path . basename ( output ) ;
19
23
const outputPath = `${ outputDir } /${ outputFile } ` ;
20
24
21
25
scribe . opt . reflow = true ;
22
26
scribe . opt . extractText = true ;
27
+ scribe . opt . displayMode = 'ebook' ;
23
28
24
- await scribe . init ( ) ;
25
- await scribe . importFiles ( [ pdfFile ] ) ;
29
+ // TODO: Fonts do not need to be loaded for .txt output, but are needed for .pdf output.
30
+ // so a more robust implementation would consider the arguments and only load fonts if necessary.
31
+ await scribe . init ( { font : true } ) ;
32
+ await scribe . importFiles ( [ inputFile ] ) ;
26
33
27
34
if ( outputDir ) fs . mkdirSync ( outputDir , { recursive : true } ) ;
28
35
0 commit comments