Skip to content

Commit 8c3caa2

Browse files
committed
feat: refactor URL content retrieval to use a dedicated function with application checks
1 parent 94e60b0 commit 8c3caa2

File tree

2 files changed

+88
-76
lines changed

2 files changed

+88
-76
lines changed

apps/oss/serializers/file.py

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,20 @@
11
# coding=utf-8
2+
import base64
3+
import ipaddress
24
import re
5+
import socket
36
import urllib
7+
from urllib.parse import urlparse
48

9+
import requests
510
import uuid_utils.compat as uuid
611
from django.db.models import QuerySet
712
from django.http import HttpResponse
813
from django.utils.translation import gettext_lazy as _
914
from rest_framework import serializers
1015

11-
from common.exception.app_exception import NotFound404
16+
from application.models import Application
17+
from common.exception.app_exception import NotFound404, AppApiException
1218
from knowledge.models import File, FileSourceType
1319
from tools.serializers.tool import UploadedFileField
1420

@@ -158,3 +164,80 @@ def delete(self):
158164
if file is not None:
159165
file.delete()
160166
return True
167+
168+
169+
def get_url_content(url, application_id: str):
170+
application = Application.objects.filter(id=application_id).first()
171+
if application is None:
172+
return AppApiException(500, _('Application does not exist'))
173+
if not application.file_upload_enable:
174+
return AppApiException(500, _('File upload is not enabled'))
175+
file_limit = 50 * 1024 * 1024
176+
if application.file_upload_setting and application.file_upload_setting.file_limit:
177+
file_limit = application.file_upload_setting.file_limit * 1024 * 1024
178+
parsed = validate_url(url)
179+
180+
response = requests.get(
181+
url,
182+
timeout=3,
183+
allow_redirects=False
184+
)
185+
final_host = urlparse(response.url).hostname
186+
if is_private_ip(final_host):
187+
raise ValueError("Blocked unsafe redirect to internal host")
188+
# 判断文件大小
189+
if response.headers.get('Content-Length', 0) > file_limit:
190+
return AppApiException(500, _('File size exceeds limit'))
191+
# 返回状态码 响应内容大小 响应的contenttype 还有字节流
192+
content_type = response.headers.get('Content-Type', '')
193+
# 根据内容类型决定如何处理
194+
if 'text' in content_type or 'json' in content_type:
195+
content = response.text
196+
else:
197+
# 二进制内容使用Base64编码
198+
content = base64.b64encode(response.content).decode('utf-8')
199+
200+
return {
201+
'status_code': response.status_code,
202+
'Content-Length': response.headers.get('Content-Length', 0),
203+
'Content-Type': content_type,
204+
'content': content,
205+
}
206+
207+
208+
def is_private_ip(host: str) -> bool:
209+
"""检测 IP 是否属于内网、环回、云 metadata 的危险地址"""
210+
try:
211+
ip = ipaddress.ip_address(socket.gethostbyname(host))
212+
return (
213+
ip.is_private or
214+
ip.is_loopback or
215+
ip.is_reserved or
216+
ip.is_link_local or
217+
ip.is_multicast
218+
)
219+
except Exception:
220+
return True
221+
222+
223+
def validate_url(url: str):
224+
"""验证 URL 是否安全"""
225+
if not url:
226+
raise ValueError("URL is required")
227+
228+
parsed = urlparse(url)
229+
230+
# 仅允许 http / https
231+
if parsed.scheme not in ("http", "https"):
232+
raise ValueError("Only http and https are allowed")
233+
234+
host = parsed.hostname
235+
# 域名不能为空
236+
if not host:
237+
raise ValueError("Invalid URL")
238+
239+
# 禁止访问内部、保留、环回、云 metadata
240+
if is_private_ip(host):
241+
raise ValueError("Access to internal IP addresses is blocked")
242+
243+
return parsed

apps/oss/views/file.py

Lines changed: 4 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,14 @@
11
# coding=utf-8
2-
import base64
3-
import ipaddress
4-
import socket
5-
from urllib.parse import urlparse
6-
7-
import requests
82
from django.utils.translation import gettext_lazy as _
93
from drf_spectacular.utils import extend_schema
104
from rest_framework.parsers import MultiPartParser
115
from rest_framework.views import APIView
126
from rest_framework.views import Request
13-
147
from common.auth import TokenAuth
158
from common.log.log import log
169
from common.result import result
1710
from knowledge.api.file import FileUploadAPI, FileGetAPI
18-
from oss.serializers.file import FileSerializer
11+
from oss.serializers.file import FileSerializer, get_url_content
1912

2013

2114
class FileRetrievalView(APIView):
@@ -84,71 +77,7 @@ class GetUrlView(APIView):
8477
operation_id=_('Get url'), # type: ignore
8578
tags=[_('Chat')] # type: ignore
8679
)
87-
def get(self, request: Request):
80+
def get(self, request: Request, application_id: str):
8881
url = request.query_params.get('url')
89-
parsed = validate_url(url)
90-
91-
response = requests.get(
92-
url,
93-
timeout=3,
94-
allow_redirects=False
95-
)
96-
final_host = urlparse(response.url).hostname
97-
if is_private_ip(final_host):
98-
raise ValueError("Blocked unsafe redirect to internal host")
99-
100-
# 返回状态码 响应内容大小 响应的contenttype 还有字节流
101-
content_type = response.headers.get('Content-Type', '')
102-
# 根据内容类型决定如何处理
103-
if 'text' in content_type or 'json' in content_type:
104-
content = response.text
105-
else:
106-
# 二进制内容使用Base64编码
107-
content = base64.b64encode(response.content).decode('utf-8')
108-
109-
return result.success({
110-
'status_code': response.status_code,
111-
'Content-Length': response.headers.get('Content-Length', 0),
112-
'Content-Type': content_type,
113-
'content': content,
114-
})
115-
116-
117-
def is_private_ip(host: str) -> bool:
118-
"""检测 IP 是否属于内网、环回、云 metadata 的危险地址"""
119-
try:
120-
ip = ipaddress.ip_address(socket.gethostbyname(host))
121-
return (
122-
ip.is_private or
123-
ip.is_loopback or
124-
ip.is_reserved or
125-
ip.is_link_local or
126-
ip.is_multicast
127-
)
128-
except Exception:
129-
return True
130-
131-
132-
def validate_url(url: str):
133-
"""验证 URL 是否安全"""
134-
if not url:
135-
raise ValueError("URL is required")
136-
137-
parsed = urlparse(url)
138-
139-
# 仅允许 http / https
140-
if parsed.scheme not in ("http", "https"):
141-
raise ValueError("Only http and https are allowed")
142-
143-
host = parsed.hostname
144-
path = parsed.path
145-
146-
# 域名不能为空
147-
if not host:
148-
raise ValueError("Invalid URL")
149-
150-
# 禁止访问内部、保留、环回、云 metadata
151-
if is_private_ip(host):
152-
raise ValueError("Access to internal IP addresses is blocked")
153-
154-
return parsed
82+
result_data = get_url_content(url, application_id)
83+
return result.success(result_data)

0 commit comments

Comments
 (0)