Skip to content

Commit 3a40ab6

Browse files
committed
ensure urls from autoclick also treated as links!
update to warcio 2.4.8 bump to 1.11.2
1 parent 581a703 commit 3a40ab6

File tree

4 files changed

+24
-5
lines changed

4 files changed

+24
-5
lines changed

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "browsertrix-crawler",
3-
"version": "1.11.1",
3+
"version": "1.11.2",
44
"main": "browsertrix-crawler",
55
"type": "module",
66
"repository": "https://github.com/webrecorder/browsertrix-crawler",
@@ -42,7 +42,7 @@
4242
"tsc": "^2.0.4",
4343
"undici": "^7.16.0",
4444
"uuid": "8.3.2",
45-
"warcio": "^2.4.7",
45+
"warcio": "git+https://github.com/webrecorder/warcio.js#multi-value-allow-always",
4646
"ws": "^7.4.4",
4747
"yargs": "^17.7.2"
4848
},

src/crawler.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -973,8 +973,14 @@ self.__bx_behaviors.selectMainBehavior();
973973
}
974974
}
975975

976-
await page.exposeFunction(BxFunctionBindings.AddToSeenSet, (data: string) =>
977-
this.crawlState.addToUserSet(data),
976+
await page.exposeFunction(
977+
BxFunctionBindings.AddToSeenSet,
978+
(data: string) => {
979+
if (data && (data.startsWith("https:") || data.startsWith("http:"))) {
980+
void callbacks.addLink(data);
981+
}
982+
return this.crawlState.addToUserSet(data);
983+
},
978984
);
979985

980986
// eslint-disable-next-line @typescript-eslint/no-explicit-any

src/util/recorder.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1776,7 +1776,7 @@ class AsyncFetcher {
17761776
} catch (e) {
17771777
logger.warn(
17781778
"Async load headers failed",
1779-
{ ...formatErr(e), ...this.recorder.logDetails },
1779+
{ ...formatErr(e), url: this.reqresp.url, ...this.recorder.logDetails },
17801780
"fetch",
17811781
);
17821782
}

yarn.lock

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7167,6 +7167,19 @@ warcio@^2.4.7:
71677167
uuid-random "^1.3.2"
71687168
yargs "^17.7.2"
71697169

7170+
"warcio@git+https://github.com/webrecorder/warcio.js#multi-value-allow-always":
7171+
version "2.4.8"
7172+
resolved "git+https://github.com/webrecorder/warcio.js#07f8e3805766083304f3c02735b735d19c6320cc"
7173+
dependencies:
7174+
"@types/pako" "^1.0.7"
7175+
"@types/stream-buffers" "^3.0.7"
7176+
base32-encode "^2.0.0"
7177+
hash-wasm "^4.9.0"
7178+
pako "^1.0.11"
7179+
tempy "^3.1.0"
7180+
uuid-random "^1.3.2"
7181+
yargs "^17.7.2"
7182+
71707183
71717184
version "0.3.10"
71727185
resolved "https://registry.yarnpkg.com/webdriver-bidi-protocol/-/webdriver-bidi-protocol-0.3.10.tgz#437405564ff7e200371468f4f1eba1ff5537e119"

0 commit comments

Comments
 (0)