|
1 | 1 | # coding=utf-8 |
| 2 | +import base64 |
| 3 | +import ipaddress |
2 | 4 | import re |
| 5 | +import socket |
3 | 6 | import urllib |
| 7 | +from urllib.parse import urlparse |
4 | 8 |
|
| 9 | +import requests |
5 | 10 | import uuid_utils.compat as uuid |
6 | 11 | from django.db.models import QuerySet |
7 | 12 | from django.http import HttpResponse |
8 | 13 | from django.utils.translation import gettext_lazy as _ |
9 | 14 | from rest_framework import serializers |
10 | 15 |
|
11 | | -from common.exception.app_exception import NotFound404 |
| 16 | +from application.models import Application |
| 17 | +from common.exception.app_exception import NotFound404, AppApiException |
12 | 18 | from knowledge.models import File, FileSourceType |
13 | 19 | from tools.serializers.tool import UploadedFileField |
14 | 20 |
|
@@ -158,3 +164,80 @@ def delete(self): |
158 | 164 | if file is not None: |
159 | 165 | file.delete() |
160 | 166 | return True |
| 167 | + |
| 168 | + |
| 169 | +def get_url_content(url, application_id: str): |
| 170 | + application = Application.objects.filter(id=application_id).first() |
| 171 | + if application is None: |
| 172 | + return AppApiException(500, _('Application does not exist')) |
| 173 | + if not application.file_upload_enable: |
| 174 | + return AppApiException(500, _('File upload is not enabled')) |
| 175 | + file_limit = 50 * 1024 * 1024 |
| 176 | + if application.file_upload_setting and application.file_upload_setting.file_limit: |
| 177 | + file_limit = application.file_upload_setting.file_limit * 1024 * 1024 |
| 178 | + parsed = validate_url(url) |
| 179 | + |
| 180 | + response = requests.get( |
| 181 | + url, |
| 182 | + timeout=3, |
| 183 | + allow_redirects=False |
| 184 | + ) |
| 185 | + final_host = urlparse(response.url).hostname |
| 186 | + if is_private_ip(final_host): |
| 187 | + raise ValueError("Blocked unsafe redirect to internal host") |
| 188 | + # 判断文件大小 |
| 189 | + if response.headers.get('Content-Length', 0) > file_limit: |
| 190 | + return AppApiException(500, _('File size exceeds limit')) |
| 191 | + # 返回状态码 响应内容大小 响应的contenttype 还有字节流 |
| 192 | + content_type = response.headers.get('Content-Type', '') |
| 193 | + # 根据内容类型决定如何处理 |
| 194 | + if 'text' in content_type or 'json' in content_type: |
| 195 | + content = response.text |
| 196 | + else: |
| 197 | + # 二进制内容使用Base64编码 |
| 198 | + content = base64.b64encode(response.content).decode('utf-8') |
| 199 | + |
| 200 | + return { |
| 201 | + 'status_code': response.status_code, |
| 202 | + 'Content-Length': response.headers.get('Content-Length', 0), |
| 203 | + 'Content-Type': content_type, |
| 204 | + 'content': content, |
| 205 | + } |
| 206 | + |
| 207 | + |
| 208 | +def is_private_ip(host: str) -> bool: |
| 209 | + """检测 IP 是否属于内网、环回、云 metadata 的危险地址""" |
| 210 | + try: |
| 211 | + ip = ipaddress.ip_address(socket.gethostbyname(host)) |
| 212 | + return ( |
| 213 | + ip.is_private or |
| 214 | + ip.is_loopback or |
| 215 | + ip.is_reserved or |
| 216 | + ip.is_link_local or |
| 217 | + ip.is_multicast |
| 218 | + ) |
| 219 | + except Exception: |
| 220 | + return True |
| 221 | + |
| 222 | + |
| 223 | +def validate_url(url: str): |
| 224 | + """验证 URL 是否安全""" |
| 225 | + if not url: |
| 226 | + raise ValueError("URL is required") |
| 227 | + |
| 228 | + parsed = urlparse(url) |
| 229 | + |
| 230 | + # 仅允许 http / https |
| 231 | + if parsed.scheme not in ("http", "https"): |
| 232 | + raise ValueError("Only http and https are allowed") |
| 233 | + |
| 234 | + host = parsed.hostname |
| 235 | + # 域名不能为空 |
| 236 | + if not host: |
| 237 | + raise ValueError("Invalid URL") |
| 238 | + |
| 239 | + # 禁止访问内部、保留、环回、云 metadata |
| 240 | + if is_private_ip(host): |
| 241 | + raise ValueError("Access to internal IP addresses is blocked") |
| 242 | + |
| 243 | + return parsed |
0 commit comments