Skip to content

Commit 8ca7756

Browse files
authored
tests: remove example.com from tests (#885)
also use local http-server for behavior tests
1 parent a2742df commit 8ca7756

File tree

7 files changed

+38
-23
lines changed

7 files changed

+38
-23
lines changed

tests/basic_crawl.test.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ const testIf = (condition, ...args) => condition ? test(...args) : test.skip(...
88

99
test("ensure basic crawl run with docker run passes", async () => {
1010
child_process.execSync(
11-
'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com/ --generateWACZ --text --collection wr-net --combineWARC --rolloverSize 10000 --workers 2 --title "test title" --description "test description" --warcPrefix custom-prefix',
11+
'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net/ --generateWACZ --text --collection wr-net --combineWARC --rolloverSize 10000 --workers 2 --title "test title" --description "test description" --warcPrefix custom-prefix',
1212
);
1313

1414
child_process.execSync(

tests/custom-behavior.test.js

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,21 @@
11
import child_process from "child_process";
22
import Redis from "ioredis";
33

4+
let proc = null;
5+
6+
const DOCKER_HOST_NAME = process.env.DOCKER_HOST_NAME || "host.docker.internal";
7+
const TEST_HOST = `http://${DOCKER_HOST_NAME}:31503`;
8+
9+
beforeAll(() => {
10+
proc = child_process.spawn("../../node_modules/.bin/http-server", ["-p", "31503"], {cwd: "tests/custom-behaviors/"});
11+
});
12+
13+
afterAll(() => {
14+
if (proc) {
15+
proc.kill();
16+
}
17+
});
18+
419

520
async function sleep(time) {
621
await new Promise((resolve) => setTimeout(resolve, time));
@@ -9,7 +24,7 @@ async function sleep(time) {
924

1025
test("test custom behaviors from local filepath", async () => {
1126
const res = child_process.execSync(
12-
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/ --scopeType page",
27+
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example-com.webrecorder.net/page --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/ --scopeType page",
1328
);
1429

1530
const log = res.toString();
@@ -21,10 +36,10 @@ test("test custom behaviors from local filepath", async () => {
2136
) > 0,
2237
).toBe(true);
2338

24-
// but not for example.org
39+
// but not for example.com
2540
expect(
2641
log.indexOf(
27-
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://example.org","workerid":0}}',
42+
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://example-com.webrecorder.net/page","workerid":0}}',
2843
) > 0,
2944
).toBe(false);
3045

@@ -37,7 +52,7 @@ test("test custom behaviors from local filepath", async () => {
3752
});
3853

3954
test("test custom behavior from URL", async () => {
40-
const res = child_process.execSync("docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --scopeType page");
55+
const res = child_process.execSync(`docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --customBehaviors ${TEST_HOST}/custom-2.js --scopeType page`);
4156

4257
const log = res.toString();
4358

@@ -51,7 +66,7 @@ test("test custom behavior from URL", async () => {
5166
});
5267

5368
test("test mixed custom behavior sources", async () => {
54-
const res = child_process.execSync("docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page");
69+
const res = child_process.execSync(`docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors ${TEST_HOST}/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page`);
5570

5671
const log = res.toString();
5772

@@ -74,7 +89,7 @@ test("test mixed custom behavior sources", async () => {
7489

7590
test("test custom behaviors from git repo", async () => {
7691
const res = child_process.execSync(
77-
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors \"git+https://github.com/webrecorder/browsertrix-crawler.git?branch=main&path=tests/custom-behaviors\" --scopeType page",
92+
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example-com.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors \"git+https://github.com/webrecorder/browsertrix-crawler.git?branch=main&path=tests/custom-behaviors\" --scopeType page",
7893
);
7994

8095
const log = res.toString();
@@ -86,10 +101,10 @@ test("test custom behaviors from git repo", async () => {
86101
) > 0,
87102
).toBe(true);
88103

89-
// but not for example.org
104+
// but not for example.com
90105
expect(
91106
log.indexOf(
92-
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://example.org/","workerid":0}}',
107+
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://example-com.webrecorder.net/","workerid":0}}',
93108
) > 0,
94109
).toBe(false);
95110

@@ -106,7 +121,7 @@ test("test invalid behavior exit", async () => {
106121

107122
try {
108123
child_process.execSync(
109-
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/invalid-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example.com/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/invalid-export.js --scopeType page",
124+
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/invalid-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net.webrecorder.net/ --url https://example-com.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/invalid-export.js --scopeType page",
110125
);
111126
} catch (e) {
112127
status = e.status;
@@ -121,7 +136,7 @@ test("test crawl exits if behavior not fetched from url", async () => {
121136

122137
try {
123138
child_process.execSync(
124-
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors https://webrecorder.net/doesntexist/custombehavior.js --scopeType page",
139+
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net --customBehaviors https://webrecorder.net/doesntexist/custombehavior.js --scopeType page",
125140
);
126141
} catch (e) {
127142
status = e.status;
@@ -136,7 +151,7 @@ test("test crawl exits if behavior not fetched from git repo", async () => {
136151

137152
try {
138153
child_process.execSync(
139-
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors git+https://github.com/webrecorder/doesntexist --scopeType page",
154+
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net --customBehaviors git+https://github.com/webrecorder/doesntexist --scopeType page",
140155
);
141156
} catch (e) {
142157
status = e.status;
@@ -151,7 +166,7 @@ test("test crawl exits if not custom behaviors collected from local path", async
151166

152167
try {
153168
child_process.execSync(
154-
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors /custom-behaviors/doesntexist --scopeType page",
169+
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net --customBehaviors /custom-behaviors/doesntexist --scopeType page",
155170
);
156171
} catch (e) {
157172
status = e.status;
@@ -166,7 +181,7 @@ test("test pushing behavior logs to redis", async () => {
166181

167182
const redisId = child_process.execSync("docker run --rm --network=crawl -p 36399:6379 --name redis -d redis");
168183

169-
const child = child_process.exec("docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ -e CRAWL_ID=behavior-logs-redis-test --network=crawl --rm webrecorder/browsertrix-crawler crawl --debugAccessRedis --redisStoreUrl redis://redis:6379 --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page --logBehaviorsToRedis");
184+
const child = child_process.exec(`docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ -e CRAWL_ID=behavior-logs-redis-test --network=crawl --rm webrecorder/browsertrix-crawler crawl --debugAccessRedis --redisStoreUrl redis://redis:6379 --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors ${TEST_HOST}/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page --logBehaviorsToRedis`);
170185

171186
let resolve = null;
172187
const crawlFinished = new Promise(r => resolve = r);

tests/custom-behaviors/custom-flow.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
},
2929
{
3030
"type": "change",
31-
"value": "https://example.com/",
31+
"value": "https://example-com.webrecorder.net/",
3232
"selectors": [
3333
[
3434
"aria/[role=\"main\"]",

tests/custom_selector.test.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ test("test valid autoclick selector passes validation", async () => {
7171

7272
try {
7373
child_process.execSync(
74-
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com/ --clickSelector button --scopeType page",
74+
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net/ --clickSelector button --scopeType page",
7575
);
7676
} catch (e) {
7777
failed = true;
@@ -87,7 +87,7 @@ test("test invalid autoclick selector fails validation, crawl fails", async () =
8787

8888
try {
8989
child_process.execSync(
90-
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com/ --clickSelector \",\" --scopeType page",
90+
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net/ --clickSelector \",\" --scopeType page",
9191
);
9292
} catch (e) {
9393
status = e.status;

tests/exclude-redirected.test.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import { execSync } from "child_process";
66

77
test("ensure exclusion is applied on redirected URL, which contains 'help', so it is not crawled", () => {
88
execSync(
9-
"docker run -p 9037:9037 -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com/ --exclude help --collection redir-exclude-test --extraHops 1");
9+
"docker run -p 9037:9037 -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net/ --exclude help --collection redir-exclude-test --extraHops 1");
1010

1111
// no entries besides header
1212
expect(

tests/invalid-behaviors/invalid-export.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ export class TestBehavior {
1010
}
1111

1212
static isMatch() {
13-
return window.location.origin === "https://example.com";
13+
return window.location.origin === "https://example-com.webrecorder.net";
1414
}
1515

1616
async *run(ctx) {

tests/retry-failed.test.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ afterAll(() => {
3838

3939

4040
test("run crawl with retries for no response", async () => {
41-
execSync(`docker run -d -v $PWD/test-crawls:/crawls -e CRAWL_ID=test -p 36387:6379 --rm webrecorder/browsertrix-crawler crawl --url http://invalid-host-x:31501 --url https://example.com/ --limit 2 --pageExtraDelay 10 --debugAccessRedis --collection retry-fail --retries 5`);
41+
execSync(`docker run -d -v $PWD/test-crawls:/crawls -e CRAWL_ID=test -p 36387:6379 --rm webrecorder/browsertrix-crawler crawl --url http://invalid-host-x:31501 --url https://example-com.webrecorder.net/ --limit 2 --pageExtraDelay 10 --debugAccessRedis --collection retry-fail --retries 5`);
4242

4343
const redis = new Redis("redis://127.0.0.1:36387/0", { lazyConnect: true, retryStrategy: () => null });
4444

@@ -90,7 +90,7 @@ test("run crawl with retries for 503, enough retries to succeed", async () => {
9090
requests = 0;
9191
success = false;
9292

93-
const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example.com/ --limit 2 --collection retry-fail-2 --retries 2 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`);
93+
const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example-com.webrecorder.net/ --limit 2 --collection retry-fail-2 --retries 2 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`);
9494

9595
let status = 0;
9696

@@ -117,7 +117,7 @@ test("run crawl with retries for 503, not enough retries, fail", async () => {
117117
requests = 0;
118118
success = false;
119119

120-
const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example.com/ --limit 2 --collection retry-fail-3 --retries 1 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`);
120+
const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example-com.webrecorder.net/ --limit 2 --collection retry-fail-3 --retries 1 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`);
121121

122122
let status = 0;
123123

@@ -143,7 +143,7 @@ test("run crawl with retries for 503, no retries, fail", async () => {
143143
requests = 0;
144144
success = false;
145145

146-
const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example.com/ --limit 2 --collection retry-fail-4 --retries 0 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`);
146+
const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example-com.webrecorder.net/ --limit 2 --collection retry-fail-4 --retries 0 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`);
147147

148148
let status = 0;
149149

0 commit comments

Comments
 (0)