Skip to content

Commit 41361de

Browse files
authored
Merge pull request #56 from ShilongLee/proxy
Proxy
2 parents 7b580c4 + 75eb589 commit 41361de

File tree

16 files changed

+391
-4
lines changed

16 files changed

+391
-4
lines changed

data/driver.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,3 +87,104 @@ async def expire(self, id: str) -> bool:
8787
logger.error(f'failed to save cookies, error: {e}')
8888
await conn.rollback()
8989
return False
90+
91+
class Proxies(SqliteStore):
92+
def __init__(self, store_path):
93+
super().__init__(store_path)
94+
self.primary_key = 'id'
95+
self.table_name = 'proxies'
96+
self._create_table()
97+
98+
def _create_table(self):
99+
with closing(self._get_sync_connection()) as conn, closing(conn.cursor()) as cursor:
100+
try:
101+
sql = f'''
102+
CREATE TABLE IF NOT EXISTS {self.table_name} (
103+
{self.primary_key} INTEGER PRIMARY KEY AUTOINCREMENT,
104+
url VARCHAR(512) NOT NULL,
105+
enable INTEGER NOT NULL,
106+
ct INTEGER NOT NULL,
107+
ut INTEGER NOT NULL
108+
)
109+
'''
110+
cursor.execute(sql)
111+
conn.commit()
112+
except Exception as e:
113+
logger.error(f'failed to create table, error: {e}')
114+
115+
async def save(self, url: str = '', enable: int = 1, id: int = 0) -> bool:
116+
ct = ut = int(time.time())
117+
async with self._get_connection() as conn:
118+
try:
119+
sql = f'UPDATE {self.table_name} SET url = ?, enable = ?, ut = ? WHERE id = ?'
120+
await conn.execute(sql, (url, enable, ut, id))
121+
if conn.total_changes == 0:
122+
sql = f'INSERT INTO {self.table_name} (url, enable, ct, ut) VALUES (?, ?, ?, ?)'
123+
await conn.execute(sql, (url, enable, ct, ut))
124+
await conn.commit()
125+
return True
126+
except Exception as e:
127+
logger.error(f'failed to save proxy, error: {e}')
128+
await conn.rollback()
129+
return False
130+
131+
async def remove(self, id: int) -> bool:
132+
async with self._get_connection() as conn:
133+
try:
134+
sql = f'DELETE FROM {self.table_name} WHERE id = ?'
135+
await conn.execute(sql, (id,))
136+
await conn.commit()
137+
return True
138+
except Exception as e:
139+
logger.error(f'failed to remove proxy, error: {e}')
140+
await conn.rollback()
141+
return False
142+
143+
async def load(self, enable: int = -1, offset: int = 0, limit: int = 0) -> list:
144+
async with self._get_connection() as conn:
145+
try:
146+
base_sql = f'SELECT * FROM {self.table_name}'
147+
params = []
148+
149+
if enable != -1:
150+
base_sql += ' WHERE enable = ?'
151+
params.append(enable)
152+
153+
if limit > 0:
154+
base_sql += ' LIMIT ? OFFSET ?'
155+
params.extend([limit, offset])
156+
157+
cursor = await conn.execute(base_sql, params)
158+
results = await cursor.fetchall()
159+
return [dict(row) for row in results]
160+
except Exception as e:
161+
logger.error(f'failed to load proxies, error: {e}')
162+
await conn.rollback()
163+
return []
164+
165+
166+
async def enable(self, id: int) -> bool:
167+
ut = int(time.time())
168+
async with self._get_connection() as conn:
169+
try:
170+
sql = f'UPDATE {self.table_name} SET enable = ?, ut = ? WHERE id = ?'
171+
await conn.execute(sql, (1, ut, id))
172+
await conn.commit()
173+
return True
174+
except Exception as e:
175+
logger.error(f'failed to save proxies, error: {e}')
176+
await conn.rollback()
177+
return False
178+
179+
async def disable(self, id: int) -> bool:
180+
ut = int(time.time())
181+
async with self._get_connection() as conn:
182+
try:
183+
sql = f'UPDATE {self.table_name} SET enable = ?, ut = ? WHERE id = ?'
184+
await conn.execute(sql, (0, ut, id))
185+
await conn.commit()
186+
return True
187+
except Exception as e:
188+
logger.error(f'failed to save proxies, error: {e}')
189+
await conn.rollback()
190+
return False

data/proxies/.gitkeep

Whitespace-only changes.

docs/api/proxies/proxies.md

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
# API 文档
2+
3+
## 代理
4+
5+
以下POST请求参数均使用raw json格式,例如添加代理接口参数为:`{"urls": ["http://example.com:1234", "http://example2.com:2345"] }`
6+
7+
### 添加代理
8+
9+
- **功能说明**
10+
11+
添加完成后,默认为启用状态(`enable = 1`)。
12+
13+
- **URL**
14+
15+
`/proxies/add`
16+
17+
- **Method**
18+
19+
`POST`
20+
21+
- **Data Params**
22+
23+
| 参数 | 必选 | 类型 | 说明 |
24+
|:---:|:---:|:---:|:---:|
25+
| urls | true | [string] | url列表 |
26+
27+
- **Response**
28+
29+
| 参数 | 必选 | 类型 | 说明 |
30+
|:---:|:---:|:---:|:---:|
31+
| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 |
32+
| data | false | null | 数据 |
33+
| msg | true | string | 请求说明(成功、参数错误、服务器错误) |
34+
35+
### 移除代理
36+
37+
- **URL**
38+
39+
`/proxies/remove`
40+
41+
- **Method**
42+
43+
`POST`
44+
45+
- **Data Params**
46+
47+
| 参数 | 必选 | 类型 | 说明 |
48+
|:---:|:---:|:---:|:---:|
49+
| ids | true | [int] | 需要移除的代理id列表,id可以从代理的list接口获取 |
50+
51+
- **Response**
52+
53+
| 参数 | 必选 | 类型 | 说明 |
54+
|:---:|:---:|:---:|:---:|
55+
| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 |
56+
| data | false | null | 数据 |
57+
| msg | true | string | 请求说明(成功、参数错误、服务器错误) |
58+
59+
### 代理列表
60+
61+
- **URL**
62+
63+
`/proxies/list`
64+
65+
- **Method**
66+
67+
`GET`
68+
69+
- **URL Params**
70+
71+
None
72+
73+
- **Response**
74+
75+
| 参数 | 必选 | 类型 | 说明 |
76+
|:---:|:---:|:---:|:---:|
77+
| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 |
78+
| data | true | list | [ [代理列表](#代理信息) ] |
79+
| msg | true | string | 请求说明(成功、参数错误、服务器错误) |
80+
81+
#### 代理信息
82+
83+
| 参数 | 必选 | 类型 | 说明 |
84+
|:---:|:---:|:---:|:---:|
85+
| id | true | int | id(用于管理) |
86+
| url | true | string | 代理地址 |
87+
| ct | true | int | 创建时间戳 |
88+
| ut | true | int | 更新时间戳 |
89+
| enable | true | int | 0: 不启用 1: 启用 |
90+
91+
### 启用代理
92+
93+
- **功能说明**
94+
95+
请求会轮换使用处于启用状态下的代理。
96+
97+
- **URL**
98+
99+
`/proxies/enable`
100+
101+
- **Method**
102+
103+
`POST`
104+
105+
- **Data Params**
106+
107+
| 参数 | 必选 | 类型 | 说明 |
108+
|:---:|:---:|:---:|:---:|
109+
| ids | true | [int] | 需要启用的代理id列表,id可以从代理的list接口获取 |
110+
111+
- **Response**
112+
113+
| 参数 | 必选 | 类型 | 说明 |
114+
|:---:|:---:|:---:|:---:|
115+
| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 |
116+
| data | false | null | 数据 |
117+
| msg | true | string | 请求说明(成功、参数错误、服务器错误) |
118+
119+
### 禁用代理
120+
121+
- **功能说明**
122+
123+
禁用状态下的代理不会被使用。
124+
125+
- **URL**
126+
127+
`/proxies/disable`
128+
129+
- **Method**
130+
131+
`POST`
132+
133+
- **Data Params**
134+
135+
| 参数 | 必选 | 类型 | 说明 |
136+
|:---:|:---:|:---:|:---:|
137+
| ids | true | [int] | 需要禁用的代理id列表,id可以从代理的list接口获取 |
138+
139+
- **Response**
140+
141+
| 参数 | 必选 | 类型 | 说明 |
142+
|:---:|:---:|:---:|:---:|
143+
| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 |
144+
| data | false | null | 数据 |
145+
| msg | true | string | 请求说明(成功、参数错误、服务器错误) |

docs/api/weibo/weibo.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,4 +250,4 @@
250250
|:---:|:---:|:---:|:---:|
251251
| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 |
252252
| data | true | struct | 数据 |
253-
| msg | true | string | 请求说明(成功、参数错误、服务器错误) |
253+
| msg | true | string | 请求说明(成功、参数错误、服务器错误) |

docs/doc.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,15 @@
9999
- 微博用户信息和作品获取
100100

101101
微博:[API 文档](api/weibo/weibo.md)
102+
103+
## 代理
104+
105+
目前支持以下接口:
106+
107+
- 添加代理url
108+
- 移除代理url
109+
- 启用代理
110+
- 关闭代理
111+
- 获取代理url列表
112+
113+
代理:[API 文档](api/proxies/proxies.md)

lib/requests/requests.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
import httpx
22
import json
3+
import time
4+
from data.driver import Proxies
5+
6+
proxyModel = Proxies("data/proxies/proxies.db")
7+
proxies = []
38

49
class Response:
510
def __init__(self, status_code, text):
@@ -21,14 +26,24 @@ async def wrapper(*args, **kwargs):
2126

2227
return wrapper
2328

29+
async def get_proxy():
30+
global proxies
31+
proxies = await proxyModel.load(enable = 1)
32+
if(len(proxies) == 0):
33+
return None
34+
proxy = proxies[int(int(time.time()) / 300) % len(proxies)]
35+
return proxy['url']
36+
2437
@retry_request
2538
async def get(url, headers=None, params=None) -> Response:
26-
async with httpx.AsyncClient() as client:
39+
proxy = await get_proxy()
40+
async with httpx.AsyncClient(proxy=proxy) as client:
2741
response = await client.get(url, headers=headers, params=params)
2842
return Response(response.status_code, response.text)
2943

3044
@retry_request
3145
async def post(url, headers=None, data=None, json=None) -> Response:
32-
async with httpx.AsyncClient() as client:
46+
proxy = await get_proxy()
47+
async with httpx.AsyncClient(proxy=proxy) as client:
3348
response = await client.post(url, headers=headers, json=json, data=data)
3449
return Response(response.status_code, response.text)

main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
CONFIG_PATH = ''
1010

1111
app = FastAPI()
12-
services = ['xhs', 'weibo', 'taobao', 'kuaishou', 'jd', 'douyin', 'bilibili']
12+
services = ['xhs', 'weibo', 'taobao', 'kuaishou', 'jd', 'douyin', 'bilibili', 'proxies']
1313

1414
def register_router():
1515
for service in services:

readme.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
- 小红书[无水印视频](docs/api/xhs/xhs.md#获取笔记详情)
3535
- 哔哩哔哩视频[一键下载](docs/api/bilibili/bilibili.md#bilibili视频下载)
3636
- 微博媒体资源[预览接口](docs/api/weibo/weibo.md#微博媒体预览)
37+
- 支持[IP代理池](https://github.com/ShilongLee/Crawler/wiki/%E4%BB%A3%E7%90%86)(轮换算法)。
3738

3839
## 快速开始
3940

service/proxies/models.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from data.driver import Proxies
2+
3+
proxies = Proxies("data/proxies/proxies.db")

service/proxies/urls.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from . import views
2+
from fastapi import APIRouter
3+
4+
router = APIRouter(prefix='/proxies')
5+
6+
router.add_api_route('/add', views.add, methods=['POST'])
7+
router.add_api_route('/list', views.list, methods=['GET'])
8+
router.add_api_route('/remove', views.remove, methods=['POST'])
9+
router.add_api_route('/disable', views.disable, methods=['POST'])
10+
router.add_api_route('/enable', views.enable, methods=['POST'])

0 commit comments

Comments
 (0)