Skip to content

Commit a5c7f26

Browse files
committed
feat: reimpl by url and archive downloads
1 parent 5f0da9c commit a5c7f26

File tree

4 files changed

+63
-90
lines changed

4 files changed

+63
-90
lines changed

src/cli.ts

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import { ItemGroup } from './item/ItemGroup';
2727
import { Item } from './item/Item';
2828
import { TraunerShelf } from './shelf/TraunerShelf';
2929
import retry from 'async-retry';
30+
import { ItemRef } from './item/ItemRef';
3031

3132
const cmd = command({
3233
name: 'd4sd',
@@ -144,7 +145,7 @@ const cmd = command({
144145
let bookUrls: string[] = [];
145146
let bookTitles: string[] = [];
146147
for (const book of args.books) {
147-
if (book.startsWith(shelf.origin)) {
148+
if (shelf.origins.some((o) => book.startsWith(o))) {
148149
bookUrls.push(book);
149150
} else {
150151
bookTitles.push(book);
@@ -153,19 +154,32 @@ const cmd = command({
153154

154155
let itemRefs = bookTitles.length > 0 ? await shelf.getItems() : [];
155156

156-
itemRefs = itemRefs.filter((ref) =>
157-
bookTitles.some(
158-
(title) =>
157+
// filter specified books
158+
itemRefs = itemRefs.filter(
159+
(ref) =>
160+
bookTitles.some((title) =>
159161
minimatch(ref.title, title, {
160162
nocase: true,
161163
dot: true,
162164
noglobstar: true,
163165
nocomment: true,
164-
}) || ref.title.toLowerCase().includes(title.toLowerCase())
165-
)
166+
})
167+
) ||
168+
bookUrls.some(
169+
(url) => url.replace(/\/$/, '') === ref.url.replace(/\/$/, '')
170+
)
166171
);
167172

168-
// todo: by url?
173+
// add the rest of the book urls with the url as title
174+
for (const bookUrl of bookUrls) {
175+
if (
176+
!itemRefs.some(
177+
(ref) => bookUrl.replace(/\/$/, '') === ref.url.replace(/\/$/, '')
178+
)
179+
) {
180+
itemRefs.push(new ItemRef(shelf, bookUrl, bookUrl));
181+
}
182+
}
169183

170184
if (itemRefs.length === 0) {
171185
console.error(`No items matching your rules could be found.`);

src/item/ItemRef.ts

Lines changed: 4 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -9,48 +9,19 @@ import { ScookBook } from './ScookBook';
99
export class ItemRef {
1010
constructor(
1111
public shelf: Shelf,
12-
public selector: string,
12+
public url: string,
1313
public title: string
1414
) {}
1515

1616
async resolve(): Promise<Item | null> {
17-
const context = this.shelf.browser.defaultBrowserContext();
1817
const page = await this.shelf.browser.newPage();
19-
2018
try {
21-
await page.goto(this.shelf.origin, {
19+
await page.goto(this.url, {
2220
waitUntil: 'load',
2321
timeout: this.shelf.options.timeout,
2422
});
2523

26-
await page.waitForSelector(this.selector, {
27-
timeout: this.shelf.options.timeout,
28-
});
29-
30-
const newPagePromise = new Promise<import('puppeteer').Page>(
31-
(resolve) => {
32-
this.shelf.browser.once('targetcreated', async (target) => {
33-
if (target.type() === 'page') {
34-
const newPage = await target.page();
35-
if (newPage) {
36-
await newPage.bringToFront();
37-
resolve(newPage);
38-
}
39-
}
40-
});
41-
}
42-
);
43-
44-
await page.click(this.selector);
45-
const newPage: any = await newPagePromise;
46-
47-
await newPage.waitForLoadState?.('load').catch(() => {});
48-
49-
await newPage
50-
.waitForNavigation({ waitUntil: 'load', timeout: 3000 })
51-
.catch(() => {});
52-
53-
const pageUrl = newPage.url();
24+
const pageUrl = page.url();
5425

5526
if (pageUrl.includes('scook.at')) {
5627
return new ScookBook(this.shelf, pageUrl, this.title);
@@ -64,19 +35,17 @@ export class ItemRef {
6435
return new OebvBook(this.shelf, pageUrl, this.title);
6536
}
6637

67-
if ((await newPage.$('#loadPage')) != null) {
38+
if ((await page.$('#loadPage')) != null) {
6839
return new DigiBook(this.shelf, pageUrl, this.title);
6940
}
7041

7142
if (
72-
//TODO: NOT WORKING
7343
await page.$$eval('script', (scripts) =>
7444
scripts.some((script) =>
7545
(script as HTMLScriptElement).src.includes('/ce.js')
7646
)
7747
)
7848
) {
79-
console.log('Archive detected');
8049
return new Archive(this.shelf, pageUrl, this.title);
8150
}
8251

src/shelf/DigiShelf.ts

Lines changed: 25 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ export class DigiShelf extends Shelf {
66
static id = 'digi';
77

88
private constructor() {
9-
super('https://digi4school.at/books');
9+
super('https://digi4school.at/');
10+
this.origins.push('https://a.digi4school.at/');
1011
}
1112

1213
static async load(options: InitOptions) {
@@ -15,7 +16,7 @@ export class DigiShelf extends Shelf {
1516

1617
protected async login() {
1718
await this.formLogin(
18-
'/',
19+
'/login',
1920
'#ion-input-0',
2021
'#ion-input-1',
2122
'ion-button[color="primary"]',
@@ -25,59 +26,39 @@ export class DigiShelf extends Shelf {
2526
.waitForNavigation({ timeout: this.options.timeout })
2627
.then(() => true),
2728
page
28-
.waitForFunction(
29-
() =>
30-
document
31-
.querySelector('div[role="dialog"]')
32-
?.innerHTML.includes('Problem'),
33-
{ timeout: this.options.timeout }
34-
)
29+
.waitForFunction(() => !!document.querySelector('ion-alert'), {
30+
timeout: this.options.timeout,
31+
})
3532
.then(() => false),
3633
])
3734
);
3835
}
3936

4037
async getItems(): Promise<ItemRef[]> {
38+
let books: { code: number; title: string }[];
39+
4140
const page = await this.browser.newPage();
4241
try {
43-
await page.goto(new URL('/books', this.origin).toString());
44-
await page.waitForSelector('#ebooksGrid > ion-row > ion-col', {
45-
timeout: this.options.timeout,
46-
});
47-
48-
const itemLinks = await page.$$('#ebooksGrid > ion-row > ion-col');
49-
50-
return await Promise.all(
51-
itemLinks.map(async (itemLink) => {
52-
const tempSelector = await itemLink.$('ion-thumbnail');
53-
54-
const uniqueSelector = await page.evaluate((el) => {
55-
let path = '';
56-
while (el.parentElement) {
57-
const tag = el.tagName.toLowerCase();
58-
const siblings = Array.from(el.parentElement.children);
59-
const index = siblings.indexOf(el) + 1;
60-
path = ` > ${tag}:nth-child(${index})` + path;
61-
el = el.parentElement;
62-
}
63-
return path.slice(3);
64-
}, itemLink);
65-
66-
const title = await tempSelector?.evaluate((el) =>
67-
el.getAttribute('title')
68-
);
69-
70-
if (!title) {
71-
throw new ScrapeError(
72-
`Could not find the title of item with url ${'test'}.`
73-
);
74-
}
75-
76-
return new ItemRef(this, uniqueSelector, title);
77-
})
42+
const res = await page.goto(
43+
new URL('/br/xhr/v2/synch', this.origin).toString(),
44+
{ waitUntil: 'domcontentloaded' }
7845
);
46+
if (!res || !res.ok || res.status() === 260) {
47+
throw new ScrapeError('Could to retrieve list of books from the API.');
48+
}
49+
const data = await res.json();
50+
books = data.books;
7951
} finally {
8052
await page.close();
8153
}
54+
55+
return books.map(
56+
(book) =>
57+
new ItemRef(
58+
this,
59+
new URL(`/ebook/${book.code}`, this.origin).toString(),
60+
book.title
61+
)
62+
);
8263
}
8364
}

src/shelf/Shelf.ts

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,11 @@ export abstract class Shelf {
1414
options!: Options;
1515
browser!: puppeteer.Browser;
1616

17-
protected constructor(public origin: string) {}
17+
public origins: string[];
18+
19+
protected constructor(public origin: string) {
20+
this.origins = [origin];
21+
}
1822

1923
protected async init(options: InitOptions) {
2024
this.options = {
@@ -45,9 +49,14 @@ export abstract class Shelf {
4549
const page = await this.browser.newPage();
4650
try {
4751
await page.goto(new URL(url, this.origin).toString());
48-
await page.waitForSelector(loginBtnSelector, {
49-
timeout: this.options.timeout,
50-
});
52+
await page.waitForFunction(
53+
(selectors: string[]) =>
54+
selectors.every((s) => document.querySelector(s) !== null),
55+
{
56+
timeout: this.options.timeout,
57+
},
58+
[userSelector, passwordSelector, loginBtnSelector]
59+
);
5160

5261
await page.type(userSelector, this.options.user);
5362
await page.type(passwordSelector, this.options.password);

0 commit comments

Comments
 (0)