Skip to content

Commit 1a7c537

Browse files
[WEB-5791] fix: broken favicon in links (#8396)
* fix: using base url of a redirect url * chore: internal networks check for the final_url * fix: none final_url * fix: exception handling * fix: exception handling * chore: remove unused imports * refactor: moved ip address check logic into separate function * fix: ValueError logic
1 parent 27a7cdc commit 1a7c537

File tree

1 file changed

+38
-15
lines changed

1 file changed

+38
-15
lines changed

apps/api/plane/bgtasks/work_item_link_task.py

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# Python imports
22
import logging
33

4-
54
# Third party imports
65
from celery import shared_task
76
import requests
@@ -20,6 +19,34 @@
2019
DEFAULT_FAVICON = "PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiIGNsYXNzPSJsdWNpZGUgbHVjaWRlLWxpbmstaWNvbiBsdWNpZGUtbGluayI+PHBhdGggZD0iTTEwIDEzYTUgNSAwIDAgMCA3LjU0LjU0bDMtM2E1IDUgMCAwIDAtNy4wNy03LjA3bC0xLjcyIDEuNzEiLz48cGF0aCBkPSJNMTQgMTFhNSA1IDAgMCAwLTcuNTQtLjU0bC0zIDNhNSA1IDAgMCAwIDcuMDcgNy4wN2wxLjcxLTEuNzEiLz48L3N2Zz4=" # noqa: E501
2120

2221

22+
def validate_url_ip(url: str) -> None:
23+
"""
24+
Validate that a URL doesn't point to a private/internal IP address.
25+
Only checks if the hostname is a direct IP address.
26+
27+
Args:
28+
url: The URL to validate
29+
30+
Raises:
31+
ValueError: If the URL points to a private/internal IP
32+
"""
33+
parsed = urlparse(url)
34+
hostname = parsed.hostname
35+
36+
if not hostname:
37+
return
38+
39+
try:
40+
ip = ipaddress.ip_address(hostname)
41+
except ValueError:
42+
# Not an IP address (it's a domain name), nothing to check here
43+
return
44+
45+
# It IS an IP address - check if it's private/internal
46+
if ip.is_private or ip.is_loopback or ip.is_reserved:
47+
raise ValueError("Access to private/internal networks is not allowed")
48+
49+
2350
def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]:
2451
"""
2552
Crawls a URL to extract the title and favicon.
@@ -31,27 +58,23 @@ def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]:
3158
str: JSON string containing title and base64-encoded favicon
3259
"""
3360
try:
34-
# Prevent access to private IP ranges
35-
parsed = urlparse(url)
36-
37-
try:
38-
ip = ipaddress.ip_address(parsed.hostname)
39-
if ip.is_private or ip.is_loopback or ip.is_reserved:
40-
raise ValueError("Access to private/internal networks is not allowed")
41-
except ValueError:
42-
# Not an IP address, continue with domain validation
43-
pass
44-
4561
# Set up headers to mimic a real browser
4662
headers = {
4763
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" # noqa: E501
4864
}
4965

5066
soup = None
5167
title = None
68+
final_url = url
69+
70+
validate_url_ip(final_url)
5271

5372
try:
54-
response = requests.get(url, headers=headers, timeout=1)
73+
response = requests.get(final_url, headers=headers, timeout=1)
74+
final_url = response.url # Get the final URL after any redirects
75+
76+
# check for redirected url also
77+
validate_url_ip(final_url)
5578

5679
soup = BeautifulSoup(response.content, "html.parser")
5780
title_tag = soup.find("title")
@@ -60,8 +83,8 @@ def crawl_work_item_link_title_and_favicon(url: str) -> Dict[str, Any]:
6083
except requests.RequestException as e:
6184
logger.warning(f"Failed to fetch HTML for title: {str(e)}")
6285

63-
# Fetch and encode favicon
64-
favicon_base64 = fetch_and_encode_favicon(headers, soup, url)
86+
# Fetch and encode favicon using final URL (after redirects)
87+
favicon_base64 = fetch_and_encode_favicon(headers, soup, final_url)
6588

6689
# Prepare result
6790
result = {

0 commit comments

Comments
 (0)