Skip to content

Commit 57b8f49

Browse files
authored
Merge pull request #50 from RobBrazier/add-libro-fm
Add libro.fm extractor
2 parents dee51eb + ef96336 commit 57b8f49

File tree

6 files changed

+186
-8
lines changed

6 files changed

+186
-8
lines changed

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/content.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { getGoodreadsDetails } from './extractors/goodreads.js';
33
import { getStoryGraphDetails } from './extractors/storygraph.js';
44
import { getGoogleBooksDetails } from './extractors/googlebooks.js';
55
import { getKoboDetails } from './extractors/kobo.js';
6+
import { getLibroDetails } from './extractors/librofm.js';
67
import { logMarian } from './shared/utils.js';
78

89

@@ -15,6 +16,7 @@ async function getDetails() {
1516
if (url.includes('isbnsearch.org')) return getIsbnSearchDetails();
1617
if (url.includes('google')) return await getGoogleBooksDetails();
1718
if (url.includes('kobo')) return await getKoboDetails();
19+
if (url.includes('libro.fm')) return await getLibroDetails();
1820
return {};
1921
}
2022

@@ -40,4 +42,4 @@ chrome.runtime.onMessage.addListener((msg, sender, sendResponse) => {
4042
}
4143
});
4244

43-
console.log('[👩🏻‍🏫 Marian] content.js loaded');
45+
console.log('[👩🏻‍🏫 Marian] content.js loaded');

src/extractors/kobo.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,9 @@ function getKoboFormatInfo(bookDetails, url) {
116116
let hours = Math.floor(audioLength);
117117
let mins = Math.round(((audioLength - hours) * 60));
118118
if (audioLength < 1) {
119-
bookDetails['Listening Length'] = mins + " minutes";
119+
bookDetails['Listening Length'] = [mins + " minutes"];
120120
} else {
121-
bookDetails['Listening Length'] = hours + " hours " + mins + " minutes";
121+
bookDetails['Listening Length'] = [hours + " hours ", mins + " minutes"];
122122
}
123123
} else {
124124
bookDetails['Reading Format'] = 'Ebook';
@@ -178,4 +178,4 @@ function extraKoboInfo(bookDetails) {
178178
}
179179
}
180180

181-
export { getKoboDetails };
181+
export { getKoboDetails };

src/extractors/librofm.js

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
import { getImageScore, logMarian, delay } from "../shared/utils.js";
2+
3+
async function getLibroDetails() {
4+
logMarian("Extracting Libro details");
5+
const bookDetails = {};
6+
7+
const imggrab = document.querySelector('.audiobook-cover .book-cover-wrap img.book-cover');
8+
bookDetails["img"] = imggrab?.src;
9+
bookDetails["imgScore"] = imggrab?.src ? await getImageScore(imggrab.src) : 0;
10+
11+
// Title
12+
getLibroBookTitle(bookDetails);
13+
14+
// Series name and number
15+
getLibroSeries(bookDetails);
16+
17+
// Contributors
18+
extractLibroContributors(bookDetails);
19+
20+
//get format and length
21+
getLibroFormatInfo(bookDetails, window.location.href)
22+
23+
// get extra block of info - isbn, language, etc.
24+
extraLibroInfo(bookDetails);
25+
26+
// Description
27+
extractLibroDescription(bookDetails);
28+
29+
logMarian("Libro extraction complete:", bookDetails);
30+
return {
31+
...bookDetails,
32+
};
33+
34+
}
35+
36+
function extractLibroContributors(bookDetails) {
37+
const contributions = {}
38+
39+
const section = extractSection('audiobook details')
40+
const authors = section.querySelectorAll('span[itemprop="author"] a')
41+
authors.forEach(author => {
42+
const name = author.textContent.trim()
43+
if (!(name in contributions)) {
44+
contributions[name] = []
45+
}
46+
contributions[name].push("Author")
47+
})
48+
49+
const narrators = section.querySelectorAll('a[href$="searchby=narrators"]')
50+
narrators.forEach(narrator => {
51+
const name = narrator.textContent.trim()
52+
if (!(name in contributions)) {
53+
contributions[name] = []
54+
}
55+
contributions[name].push("Narrator")
56+
})
57+
58+
let contributors = []
59+
for (let [name, roles] of Object.entries(contributions)) {
60+
contributors.push({ name, roles })
61+
}
62+
if (contributors.length) {
63+
bookDetails["Contributors"] = contributors;
64+
}
65+
}
66+
67+
function getLibroSeries(bookDetails) {
68+
const seriesName = document.querySelector('.audiobook-title__series a');
69+
if (seriesName) {
70+
let name = seriesName.textContent.trim();
71+
bookDetails['Series'] = name;
72+
let seriesPlace = extractTextNode(document.querySelector('.audiobook-title__series'));
73+
let number = seriesPlace.match(/\d+/);
74+
if (number) {
75+
bookDetails['Series Place'] = number[0];
76+
}
77+
}
78+
}
79+
80+
function getLibroBookTitle(bookDetails) {
81+
const h1 = document.querySelector('h1.audiobook-title');
82+
const rawTitle = h1?.childNodes[0]?.textContent.trim();
83+
rawTitle ? bookDetails["Title"] = rawTitle : null;
84+
}
85+
86+
function joinContent(elements) {
87+
return Array.from(elements)
88+
// libro.fm uses <br> tags instead of <p> tags for paragraphs, so have to use innerText
89+
.map(item => item.innerText.trim())
90+
// split by newlines so that everything isn't on one line
91+
.flatMap(item => item.split('\n'))
92+
// strip out empty lines (there are some random empty <p> tags)
93+
.filter(item => item.length > 0)
94+
.join("\n");
95+
}
96+
97+
function extractTextNode(element) {
98+
return Array.from(element?.childNodes || [])
99+
.filter(n => n.nodeType == Node.TEXT_NODE)
100+
.map(n => n.textContent.trim())
101+
.join("\n")
102+
.trim();
103+
}
104+
105+
function extractSection(title) {
106+
const sections = document.querySelectorAll('section')
107+
return Array.from(sections)
108+
.find(section => section.querySelector('h2')?.textContent.trim().toLowerCase() == title)
109+
}
110+
111+
function getLibroFormatInfo(bookDetails) {
112+
bookDetails['Reading Format'] = 'Audiobook';
113+
const informationSections = document.querySelectorAll(".audiobook-information .audiobook-information__section");
114+
115+
const audioLength = extractTextNode(
116+
Array.from(informationSections)
117+
.find(section => section.querySelector("strong")?.textContent.trim().toLowerCase() == 'length')
118+
);
119+
120+
// split the length by number boundary
121+
const lengthParts = audioLength.split(/ (?=\d+)/);
122+
123+
bookDetails['Listening Length'] = lengthParts;
124+
125+
}
126+
127+
function extractLibroDescription(bookDetails) {
128+
const summaryEl = extractSection('summary');
129+
// if there is a tab for more information about the authors, it's a different element
130+
const summaryTabEl = document.querySelector('#panel_summary')
131+
const element = summaryEl || summaryTabEl;
132+
if (element) {
133+
const summary = joinContent(element.querySelectorAll('p'))
134+
bookDetails["Description"] = summary;
135+
}
136+
}
137+
138+
139+
function extraLibroInfo(bookDetails) {
140+
const section = extractSection('audiobook details')
141+
const publisher = section.querySelector('span[itemprop="publisher"]')
142+
if (publisher) {
143+
bookDetails['Publisher'] = publisher.textContent.trim();
144+
}
145+
const releaseDate = section.querySelector('span[itemprop="datePublished"]')
146+
if (releaseDate) {
147+
bookDetails['Publication date'] = releaseDate.textContent.trim();
148+
}
149+
const language = section.querySelector('span[itemprop="inLanguage"]')
150+
if (language) {
151+
bookDetails['Language'] = language.textContent.trim();
152+
}
153+
const isbn = section.querySelector('span[itemprop="isbn"]')
154+
if (isbn) {
155+
const isbnText = isbn.textContent.trim()
156+
if (isbnText.length == 13) {
157+
bookDetails['ISBN-13'] = isbnText;
158+
} else if (isbn.length == 10) {
159+
bookDetails['ISBN-10'] = isbnText;
160+
}
161+
}
162+
163+
// no nice itemprop attribute for edition type :(
164+
const cells = section.querySelectorAll('.cell')
165+
// try to find the relevant cell with the 'Edition' header
166+
const editionCell = Array.from(cells)
167+
.find(cell => cell.querySelector('strong')?.textContent.trim().toLowerCase() == 'edition');
168+
if (editionCell) {
169+
let editionFormat = editionCell.querySelector('span')?.textContent.trim()
170+
bookDetails['Edition Format'] = editionFormat;
171+
}
172+
}
173+
174+
export { getLibroDetails };

src/manifest.base.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@
7070
"https://www.goodreads.com/book/show/*",
7171
"https://app.thestorygraph.com/books/*",
7272
"https://www.google.com/books/*",
73-
"https://www.kobo.com/*/*/*book/*"
73+
"https://www.kobo.com/*/*/*book/*",
74+
"https://libro.fm/audiobooks/*"
7475
],
7576
"js": [
7677
"content.js"

src/shared/allowed-patterns.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ const ALLOWED_PATTERNS = [
66
/https:\/\/www\.goodreads\.[a-z.]+\/book\/show\/\d+(-[a-zA-Z0-9-]+)?/,
77
/^https:\/\/app\.thestorygraph\.[a-z.]+\/books\/[0-9a-fA-F-]+$/,
88
/^https?:\/\/(www\.)?google\.[a-z.]+\/books/,
9-
/^https?:\/\/(www\.)?kobo\.[a-z]{2,10}\/[a-z]{2,5}\/[a-z]{2,5}\/[a-z]{1,5}book\/[0-9a-z\-]+/
9+
/^https?:\/\/(www\.)?kobo\.[a-z]{2,10}\/[a-z]{2,5}\/[a-z]{2,5}\/[a-z]{1,5}book\/[0-9a-z\-]+/,
10+
/^https?:\/\/(www\.)?libro\.fm\/audiobooks\/\d+(-[a-zA-Z0-9-]+)?/
1011
];
1112

1213
export function isAllowedUrl(url) {

0 commit comments

Comments
 (0)