-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathisbnde.js
More file actions
170 lines (145 loc) · 4.97 KB
/
isbnde.js
File metadata and controls
170 lines (145 loc) · 4.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import { Extractor } from './AbstractExtractor.js';
import { addContributor, getCoverData, logMarian, remapKeys, cleanText, normalizeReadingFormat, collectObject } from '../shared/utils.js';
class isbndeScraper extends Extractor {
get _name() { return "ISBN.de Extractor"; }
_sitePatterns = [
/https:\/\/(?:www\.)?isbn\.de\/(buch|ebook|hoerbuch)\/((?:\d{3})?\d{9}(?:X|\d))\b/,
];
async getDetails() {
const bookDetails = {};
const coverData = getCover();
bookDetails["Title"] = getTitle();
bookDetails["Description"] = getDescription() || "";
const details = extractTable()
// logMarian("bookDetails", { ...bookDetails, ...details });
return collectObject([
bookDetails,
details,
coverData,
]);
}
}
const remapings = {
'Auflage': 'Edition Information',
'Autor': "Author",
'Verlag': "Publisher",
"Rubrik": "Category",
}
const nameRemap = remapKeys.bind(undefined, remapings);
function getTitle() {
const container = document.querySelector(".isbnhead");
let title = cleanText(container.querySelector("h1")?.textContent);
const subtitle = cleanText(container.querySelector("h2")?.textContent);
if (subtitle && !subtitle.toLowerCase().includes("kein Untertitel".toLowerCase())) {
title = `${title}: ${subtitle}`;
}
return title;
}
function getDescription() {
const container = document.querySelector("#bookdesc");
return container?.innerText || null;
}
async function getCover() {
/**@type{string|null}*/
const coverUrl = document.querySelector("img#ISBNcover")?.src || null;
const largeUrl = coverUrl?.replace("cover", "gross"); // get large cover
return getCoverData([coverUrl, largeUrl]);
}
/**
* js date function is dumb, ensuring it will parse date correctly if you are not in us
*
* @param {string} date A date string written as DD.MM.YYYY
* @returns {Date} A js date object
*/
function parseDate(date) {
const parts = date.split('.');
const day = parseInt(parts[0], 10);
const month = parseInt(parts[1], 10) - 1; // Month is 0-indexed
const year = parseInt(parts[2], 10);
return new Date(year, month, day);
}
function extractTable() {
const container = document.querySelector("div.infotab");
const table = {};
container.childNodes.forEach((el) => {
let children = el.childNodes;
// exceptions
const title = cleanText(children[0].textContent);
if (title.includes("Einband")) return; // part of paperback -- skip
if (title.includes("Digitalprodukt")) return; // part of ebook -- skip
if (title === "Audio CD") return; // part of audiobook -- skip
if (title === "Buch" || title === "Softcover" || title.includes("eBook") || title === "Audio-CD") {
let cover = "";
let format = "Physical Book";
if (title === "Buch") cover = "Hardcover";
else if (title === "Softcover") cover = "Paperback";
else if (title === "Audio-CD") {
format = "Audiobook";
cover = "Audio CD";
}
else {
format = "Ebook";
cover = title.split(",")[1]?.trim();
}
table["Reading Format"] = normalizeReadingFormat(format);
table["Edition Format"] = cover;
const pages = cleanText(children[1].textContent);
if (!pages.includes("Seiten")) {
logMarian("Invalid pages", pages)
} else {
table["Pages"] = pages.split(' ')[0].trim();
}
return;
}
if (title === "erschienen am" || title === "Erscheinungsjahr" || title === "erschienen im") {
let date = children[1].textContent.trim()
if (title === "Erscheinungsjahr") date = "1.1." + date;
try {
table["Publication date"] = parseDate(date).toISOString();
} catch {
table["Publication date"] = new Date(date).toISOString();
}
return;
}
if (title === "ISBN-13") { // because of the fancy link
let isbn13 = "";
for (let i = 1; i < children.length; i++) {
isbn13 = isbn13 + children[i].textContent.trim();
}
table[title] = isbn13;
return;
}
if (title === "Autor") {
table["Contributors"] = addContributor([], children[1].textContent.trim(), "Author");
return;
}
if (title === "Autoren") {
const contributors = []
children.forEach((node) => {
if (node.nodeName !== "A") return;
addContributor(contributors, node.textContent.trim(), "Author");
})
table["Contributors"] = contributors
return;
}
if (title === "Abmessungen") return; // dimensions -- skip
if (title === "Reihe") {
table["Series"] = children[1].textContent.trim();
return;
}
// rest of table
if (children.length !== 2 || children[0].nodeName !== 'DIV') {
logMarian('invalid row', el.textContent);
return;
}
let key = children[0].textContent?.trim();
const value = children[1].textContent?.trim();
if (!key) {
logMarian("empty key", el.textContent);
return;
}
table[key] = value
});
return nameRemap(table);
}
export { isbndeScraper };