Skip to content

Commit 7962b25

Browse files
committed
v1.2.7
1 parent 80189e6 commit 7962b25

File tree

9 files changed

+243
-157
lines changed

9 files changed

+243
-157
lines changed

README.md

Lines changed: 38 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ Forwards intercepted requests from the browser to Node.js where it handles the r
55

66
## Features
77

8-
- Proxy per page **and** per request
9-
- Supports **(** http, https, socks4, socks5 **)** proxies
10-
- Authentication
11-
- Cookie handling internally
8+
- Proxy per page and proxy per request
9+
- Supports **http**, **https**, **socks4** and **socks5** proxies
10+
- Supports authentication
11+
- Handles cookies
1212

1313
## Installation
1414
```
@@ -20,9 +20,9 @@ npm i puppeteer-page-proxy
2020
- `pageOrReq` <[object](https://developer.mozilla.org/en-US/docs/Glossary/Object)> 'Page' or 'Request' object to set a proxy for.
2121
- `proxy` <[string](https://developer.mozilla.org/en-US/docs/Glossary/String)|[object](https://developer.mozilla.org/en-US/docs/Glossary/Object)> Proxy to use in the current page.
2222
* Begins with a protocol (e.g. http://, https://, socks://)
23-
* In the case of [proxy per request](https://github.com/Cuadrix/puppeteer-page-proxy#proxy-per-request), this can be an object with optional properites for overriding requests:\
23+
* In the case of [proxy per request](https://github.com/Cuadrix/puppeteer-page-proxy#proxy-per-request), this can be an object with optional properties for overriding requests:\
2424
`url`, `method`, `postData`, `headers`\
25-
See [request.continue](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#requestcontinueoverrides) for more info about the above properties.
25+
See [httpRequest.continue](https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#httprequestcontinueoverrides) for more info about the above properties.
2626

2727
#### PageProxy.lookup(page[, lookupService, isJSON, timeout])
2828

@@ -38,26 +38,14 @@ See [request.continue](https://github.com/puppeteer/puppeteer/blob/master/docs/a
3838
**NOTE:** By default this method expects a response in [JSON](https://en.wikipedia.org/wiki/JSON#Example) format and [JSON.parse](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/parse)'s it to a usable javascript object. To disable this functionality, set `isJSON` to `false`.
3939

4040
## Usage
41-
#### Proxy per page:
41+
#### Importing:
4242
```js
43-
const puppeteer = require('puppeteer');
4443
const useProxy = require('puppeteer-page-proxy');
44+
```
4545

46-
(async () => {
47-
const site = 'https://example.com';
48-
const proxy = 'http://host:port';
49-
const proxy2 = 'https://host:port';
50-
51-
const browser = await puppeteer.launch({headless: false});
52-
53-
const page = await browser.newPage();
54-
await useProxy(page, proxy);
55-
await page.goto(site);
56-
57-
const page2 = await browser.newPage();
58-
await useProxy(page2, proxy2);
59-
await page2.goto(site);
60-
})();
46+
#### Proxy per page:
47+
```js
48+
await useProxy(page, 'http://127.0.0.1:80');
6149
```
6250
To remove proxy, omit or pass in falsy value (e.g `null`):
6351
```js
@@ -66,43 +54,31 @@ await useProxy(page, null);
6654

6755
#### Proxy per request:
6856
```js
69-
const puppeteer = require('puppeteer');
70-
const useProxy = require('puppeteer-page-proxy');
71-
72-
(async () => {
73-
const site = 'https://example.com';
74-
const proxy = 'socks://host:port';
75-
76-
const browser = await puppeteer.launch({headless: false});
77-
const page = await browser.newPage();
78-
79-
await page.setRequestInterception(true);
80-
page.on('request', async req => {
81-
await useProxy(req, proxy);
82-
});
83-
await page.goto(site);
84-
})();
57+
await page.setRequestInterception(true);
58+
page.on('request', async request => {
59+
await useProxy(request, 'https://127.0.0.1:443');
60+
});
8561
```
8662
The request object itself is passed as the first argument. The proxy can now be changed every request.
8763

8864
Using it along with other interception methods:
8965
```js
9066
await page.setRequestInterception(true);
91-
page.on('request', async req => {
67+
page.on('request', async request => {
9268
if (req.resourceType() === 'image') {
9369
req.abort();
9470
} else {
95-
await useProxy(req, proxy);
71+
await useProxy(request, 'socks4://127.0.0.1:1080');
9672
}
9773
});
9874
```
9975

10076
Overriding requests:
10177
```js
10278
await page.setRequestInterception(true);
103-
page.on('request', async req => {
104-
await useProxy(req, {
105-
proxy: proxy,
79+
page.on('request', async request => {
80+
await useProxy(request, {
81+
proxy: 'socks5://127.0.0.1:1080',
10682
url: 'https://example.com',
10783
method: 'POST',
10884
postData: '404',
@@ -113,40 +89,29 @@ page.on('request', async req => {
11389
});
11490
```
11591

116-
**NOTE:** It is necessary to set [page.setRequestInterception](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#pagesetrequestinterceptionvalue) to true when setting proxies per request, otherwise the function will fail.
92+
**NOTE:** It is necessary to set [page.setRequestInterception](https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#pagesetrequestinterceptionvalue) to true when setting proxies per request, otherwise the function will fail.
11793

118-
#### Authentication:
94+
#### Authenticating:
11995
```js
120-
const proxy = 'https://login:pass@host:port';
96+
const proxy = 'https://user:pass@host:port';
12197
```
12298

123-
#### Lookup IP used by proxy:
99+
#### IP lookup:
124100
```js
125-
const puppeteer = require('puppeteer');
126-
const useProxy = require('puppeteer-page-proxy');
127-
128-
(async () => {
129-
const site = 'https://example.com';
130-
const proxy1 = 'http://host:port';
131-
const proxy2 = 'https://host:port';
132-
133-
const browser = await puppeteer.launch({headless: false});
134-
135-
// 1
136-
const page1 = await browser.newPage();
137-
await useProxy(page1, proxy1);
138-
let data = await useProxy.lookup(page1); // Waits until done, 'then' continues
139-
console.log(data.ip);
140-
await page1.goto(site);
101+
// 1. Waits until done, 'then' continues
102+
const data = await useProxy.lookup(page1);
103+
console.log(data.ip);
141104

142-
// 2
143-
const page2 = await browser.newPage();
144-
await useProxy(page2, proxy2);
145-
useProxy.lookup(page2).then(data => { // Executes and 'comes back' once done
146-
console.log(data.ip);
147-
});
148-
await page2.goto(site);
149-
})();
105+
// 2. Executes and 'comes back' once done
106+
useProxy.lookup(page2).then(data => {
107+
console.log(data.ip);
108+
});
109+
```
110+
In case of any [CORS](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS) errors, use `--disable-web-security` launch flag:
111+
```js
112+
const browser = await puppeteer.launch({
113+
args: ['--disable-web-security']
114+
});
150115
```
151116

152117
## FAQ
@@ -156,7 +121,7 @@ It takes over the task of requesting content **from** the browser to do it inter
156121

157122
#### Why am I getting _"Request is already handled!"_?
158123

159-
This happens when there is an attempt to handle the same request more than once. An intercepted request is handled by either [request.abort](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#requestaborterrorcode), [request.continue](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#requestcontinueoverrides) or [request.respond](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#requestrespondresponse) methods. Each of these methods 'send' the request to its destination. A request that has already reached its destination cannot be intercepted or handled.
124+
This happens when there is an attempt to handle the same request more than once. An intercepted request is handled by either [httpRequest.abort](https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#httprequestaborterrorcode), [httpRequest.continue](https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#httprequestcontinueoverrides) or [httpRequest.respond](https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#httprequestrespondresponse) methods. Each of these methods 'send' the request to its destination. A request that has already reached its destination cannot be intercepted or handled.
160125

161126

162127
#### Why does the browser show _"Your connection to this site is not secure"_?

changelog.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
# Change log
2+
### [1.2.7] - 2020-06-30
3+
#### Changes
4+
- Reimplement cookie handling to account for deletion and addition of browser cookies
5+
- Changed default lookup fetch source to **api64.ipify.org**
6+
- Update documentation
27
### [1.2.6] - 2020-06-18
38
#### Changes
49
- Updated for Puppeteer's v4.0.0 [breaking changes](https://github.com/puppeteer/puppeteer/releases/tag/v4.0.0) ([#22](https://github.com/Cuadrix/puppeteer-page-proxy/issues/22), [#23](https://github.com/Cuadrix/puppeteer-page-proxy/issues/23))

package.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "puppeteer-page-proxy",
33
"description": "Additional Node.js module to use with 'puppeteer' for setting proxies per page basis.",
4-
"version": "1.2.6",
4+
"version": "1.2.7",
55
"author": "Cuadrix <[email protected]> (https://github.com/Cuadrix)",
66
"homepage": "https://github.com/Cuadrix/puppeteer-page-proxy",
77
"main": "./src/index.js",
@@ -10,8 +10,8 @@
1010
"test": "echo \"Error: no test specified\" && exit 1"
1111
},
1212
"repository": {
13-
"type" : "git",
14-
"url" : "https://github.com/Cuadrix/puppeteer-page-proxy.git"
13+
"type": "git",
14+
"url": "https://github.com/Cuadrix/puppeteer-page-proxy.git"
1515
},
1616
"keywords": [
1717
"puppeteer",
@@ -28,4 +28,4 @@
2828
"socks-proxy-agent": "^5.0.0",
2929
"tough-cookie": "^4.0.0"
3030
}
31-
}
31+
}

src/core/lookup.js

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,36 @@
1-
const lookup = async (page, lookupService = "https://api.ipify.org?format=json", isJSON = true, timeout = 30000) => {
1+
const lookup = async (page, lookupService = "https://api64.ipify.org?format=json", isJSON = true, timeout = 30000) => {
22
const doLookup = async () => {
33
return await page.evaluate((lookupService, timeout, isJSON) => {
44
return new Promise((resolve) => {
5-
const req = new XMLHttpRequest();
6-
req.timeout = timeout;
7-
req.onload = () => {
8-
if (req.status >= 200 && req.status <= 299) {
9-
resolve(isJSON ? JSON.parse(req.responseText) : req.responseText);
10-
} else {
11-
resolve(onLookupFailed(`Request from ${window.location.href} to ${lookupService} failed with status code ${req.status}`));
12-
}
5+
const request = new XMLHttpRequest();
6+
request.timeout = timeout;
7+
request.onload = () => {
8+
if (request.status >= 200 && request.status <= 299) {
9+
resolve(isJSON ? JSON.parse(request.responseText) : request.responseText);
10+
} else {resolve(onLookupFailed(
11+
`Request from ${window.location.href} to ` +
12+
`${lookupService} failed with status code ${request.status}`
13+
))}
1314
};
14-
req.ontimeout = (error) => {
15-
resolve(onLookupFailed(`Request from ${window.location.href} to ${lookupService} timed out -> ${req.timeout} ms`));
16-
};
17-
req.open("GET", lookupService, true);
18-
req.send();
15+
request.ontimeout = (error) => {resolve(onLookupFailed(
16+
`Request from ${window.location.href} to ` +
17+
`${lookupService} timed out at ${request.timeout} ms`
18+
))};
19+
request.open("GET", lookupService, true);
20+
request.send();
1921
});
2022
}, lookupService, timeout, isJSON);
2123
};
2224
try {
2325
await page.setBypassCSP(true);
24-
const functionName = "onLookupFailed";
26+
const functionName = "$ppp_on_lookup_failed";
2527
if (!page._pageBindings.has(functionName)) {
26-
await page.exposeFunction(functionName, (reason) => {
27-
console.error(reason);
28-
return;
28+
await page.exposeFunction(functionName, (failReason) => {
29+
console.error(failReason); return;
2930
});
3031
}
3132
return await doLookup();
32-
} catch(error) {
33-
if (error.message.startsWith("Execution context was destroyed")) {
34-
return await doLookup();
35-
}
36-
}
33+
} catch(error) {console.error(error)}
3734
};
3835

3936
module.exports = lookup;

src/core/proxy.js

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,41 @@
1-
const request = require("got");
2-
const type = require("../lib/types");
3-
const cookieJar = require("../lib/cookies");
4-
const {setOverrides, setHeaders, setAgent} = require("../lib/options");
1+
const got = require("got");
2+
const CookieHandler = require("../lib/cookies");
3+
const {setHeaders, setAgent} = require("../lib/options");
4+
const type = require("../util/types");
55

66
// Responsible for applying proxy
7-
const proxyHandler = async (req, proxy) => {
7+
const requestHandler = async (request, proxy, overrides = {}) => {
8+
// Reject non http(s) URI schemes
9+
if (!request.url().startsWith("http") && !request.url().startsWith("https")) {
10+
request.continue(); return;
11+
}
12+
const cookieHandler = new CookieHandler(request);
13+
// Request options for Got accounting for overrides
814
const options = {
9-
cookieJar,
10-
method: req.method(),
11-
body: req.postData(),
12-
headers: setHeaders(req),
15+
cookieJar: await cookieHandler.getCookies(),
16+
method: overrides.method || request.method(),
17+
body: overrides.postData || request.postData(),
18+
headers: overrides.headers || setHeaders(request),
1319
agent: setAgent(proxy),
1420
responseType: "buffer",
1521
maxRedirects: 15,
1622
throwHttpErrors: false
1723
};
1824
try {
19-
const res = await request(req.url(), options);
20-
await req.respond({
21-
status: res.statusCode,
22-
headers: res.headers,
23-
body: res.body
25+
const response = await got(overrides.url || request.url(), options);
26+
// Set cookies manually because "set-cookie" doesn't set all cookies (?)
27+
// Perhaps related to https://github.com/puppeteer/puppeteer/issues/5364
28+
const setCookieHeader = response.headers["set-cookie"];
29+
if (setCookieHeader) {
30+
await cookieHandler.setCookies(setCookieHeader);
31+
response.headers["set-cookie"] = undefined;
32+
}
33+
await request.respond({
34+
status: response.statusCode,
35+
headers: response.headers,
36+
body: response.body
2437
});
25-
} catch(error) {await req.abort()}
38+
} catch(error) {await request.abort()}
2639
};
2740

2841
// For reassigning proxy of page
@@ -41,7 +54,7 @@ const removeRequestListener = (page, listenerName) => {
4154
};
4255

4356
// Calls this if request object passed
44-
const proxyPerRequest = async (req, data) => {
57+
const proxyPerRequest = async (request, data) => {
4558
let proxy, overrides;
4659
// Separate proxy and overrides
4760
if (type(data) === "object") {
@@ -51,21 +64,21 @@ const proxyPerRequest = async (req, data) => {
5164
overrides = data;
5265
}
5366
} else {proxy = data}
54-
req = setOverrides(req, overrides);
5567
// Skip request if proxy omitted
56-
if (proxy) {await proxyHandler(req, proxy)}
57-
else {req.continue(overrides)}
68+
if (proxy) {await requestHandler(request, proxy, overrides)}
69+
else {request.continue(overrides)}
5870
};
5971

6072
// Calls this if page object passed
6173
const proxyPerPage = async (page, proxy) => {
6274
await page.setRequestInterception(true);
63-
removeRequestListener(page, "$ppp");
64-
if (proxy) {
65-
page.on("request", $ppp = async (req) => {
66-
await proxyHandler(req, proxy);
67-
});
68-
} else {await page.setRequestInterception(false)}
75+
const listener = "$ppp_request_listener";
76+
removeRequestListener(page, listener);
77+
const f = {[listener]: async (request) => {
78+
await requestHandler(request, proxy);
79+
}};
80+
if (proxy) {page.on("request", f[listener])}
81+
else {await page.setRequestInterception(false)}
6982
};
7083

7184
// Main function
@@ -74,7 +87,7 @@ const useProxy = async (target, data) => {
7487
if (targetType === "HTTPRequest") {
7588
await proxyPerRequest(target, data);
7689
} else if (targetType === "Page") {
77-
await proxyPerPage(target, data)
90+
await proxyPerPage(target, data);
7891
}
7992
};
8093

0 commit comments

Comments
 (0)