-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.js
More file actions
90 lines (74 loc) · 2.17 KB
/
index.js
File metadata and controls
90 lines (74 loc) · 2.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
var PDFJS = null;
function render_page(pageData) {
let render_options = {
normalizeWhitespace: false,
disableCombineTextItems: false,
};
return pageData.getTextContent(render_options).then(function (textContent) {
let lastY,
text = "";
for (let item of textContent.items) {
if (lastY == item.transform[5] || !lastY) {
text += item.str;
} else {
text += "\n" + item.str;
}
lastY = item.transform[5];
}
return text;
});
}
const DEFAULT_OPTIONS = {
pagerender: render_page,
max: 0,
version: "v1.10.100",
};
async function PDF(dataBuffer, options) {
var isDebugMode = false;
let ret = {
numpages: 0,
numrender: 0,
info: null,
metadata: null,
pageTextArray: [],
text: "",
version: null,
};
if (typeof options == "undefined") options = DEFAULT_OPTIONS;
if (typeof options.pagerender != "function")
options.pagerender = DEFAULT_OPTIONS.pagerender;
if (typeof options.max != "number") options.max = DEFAULT_OPTIONS.max;
if (typeof options.version != "string")
options.version = DEFAULT_OPTIONS.version;
if (options.version == "default") options.version = DEFAULT_OPTIONS.version;
PDFJS = PDFJS ? PDFJS : require(`./pdf.js/${options.version}/build/pdf.js`);
ret.version = PDFJS.version;
PDFJS.disableWorker = true;
let doc = await PDFJS.getDocument(dataBuffer);
ret.numpages = doc.numPages;
let metaData = await doc.getMetadata().catch(function (err) {
return null;
});
ret.info = metaData ? metaData.info : null;
ret.metadata = metaData ? metaData.metadata : null;
let counter = options.max <= 0 ? doc.numPages : options.max;
counter = counter > doc.numPages ? doc.numPages : counter;
ret.text = "";
ret.pageTextArray = [];
for (var i = 1; i <= counter; i++) {
let pageText = await doc
.getPage(i)
.then((pageData) => options.pagerender(pageData))
.catch((err) => {
// todo log err using debug
debugger;
return "";
});
ret.text = `${ret.text}\n\n${pageText}`;
ret.pageTextArray.push(`${pageText}`);
}
ret.numrender = counter;
doc.destroy();
return ret;
}
module.exports = PDF;