Skip to content

Commit cf7a586

Browse files
committed
add url validator
1 parent d4c1f46 commit cf7a586

File tree

2 files changed

+182
-0
lines changed

2 files changed

+182
-0
lines changed

dataplane/src/main/java/io/sentrius/sso/core/services/documents/retrieval/HttpDocumentRetrievalService.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ public DocumentRetrievalResult retrieveDocumentWithMetadata(String sourceUrl, Ma
4646

4747
log.info("Retrieving document from HTTP(S) source: {}", sourceUrl);
4848

49+
// Validate URL to prevent SSRF attacks
50+
UrlValidator.validateUrl(sourceUrl);
51+
4952
try {
5053
// Build headers from options
5154
HttpHeaders headers = new HttpHeaders();
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
package io.sentrius.sso.core.services.documents.retrieval;
2+
3+
import lombok.extern.slf4j.Slf4j;
4+
5+
import java.net.InetAddress;
6+
import java.net.URI;
7+
import java.net.UnknownHostException;
8+
9+
/**
10+
* Validates URLs to prevent Server-Side Request Forgery (SSRF) attacks.
11+
* Blocks access to private networks, localhost, and non-HTTP(S) protocols.
12+
*/
13+
@Slf4j
14+
public class UrlValidator {
15+
16+
/**
17+
* Validates a URL to prevent SSRF attacks
18+
*
19+
* @param url The URL to validate
20+
* @throws DocumentRetrievalException if URL is invalid or potentially dangerous
21+
*/
22+
public static void validateUrl(String url) throws DocumentRetrievalException {
23+
if (url == null || url.trim().isEmpty()) {
24+
throw new DocumentRetrievalException("URL cannot be null or empty");
25+
}
26+
27+
URI uri;
28+
try {
29+
uri = URI.create(url);
30+
} catch (IllegalArgumentException e) {
31+
throw new DocumentRetrievalException("Invalid URL format: " + e.getMessage());
32+
}
33+
34+
// Validate scheme - only allow http and https
35+
String scheme = uri.getScheme();
36+
if (scheme == null || (!scheme.equalsIgnoreCase("http") && !scheme.equalsIgnoreCase("https"))) {
37+
throw new DocumentRetrievalException(
38+
"Invalid URL scheme. Only HTTP and HTTPS protocols are allowed. Found: " + scheme);
39+
}
40+
41+
// Get the host from the URI
42+
String host = uri.getHost();
43+
if (host == null || host.trim().isEmpty()) {
44+
throw new DocumentRetrievalException("URL must contain a valid host");
45+
}
46+
47+
// Normalize host to lowercase for comparison
48+
host = host.toLowerCase();
49+
50+
// Block localhost and localhost-like hostnames
51+
if (isLocalhost(host)) {
52+
throw new DocumentRetrievalException(
53+
"Access to localhost is not allowed for security reasons");
54+
}
55+
56+
// Check if host is an IP address literal
57+
if (isIpAddressLiteral(host)) {
58+
// If it's an IP address, validate it directly
59+
try {
60+
InetAddress address = InetAddress.getByName(host);
61+
if (isPrivateOrReservedAddress(address)) {
62+
throw new DocumentRetrievalException(
63+
"Access to private or reserved IP addresses is not allowed for security reasons: " +
64+
address.getHostAddress());
65+
}
66+
} catch (UnknownHostException e) {
67+
throw new DocumentRetrievalException("Invalid IP address: " + host);
68+
}
69+
} else {
70+
// For domain names, try to resolve but don't fail if DNS is unavailable
71+
// This allows the service to attempt the connection, where the actual HTTP client
72+
// will handle DNS resolution failures appropriately
73+
try {
74+
InetAddress address = InetAddress.getByName(host);
75+
if (isPrivateOrReservedAddress(address)) {
76+
throw new DocumentRetrievalException(
77+
"Access to private or reserved IP addresses is not allowed for security reasons: " +
78+
address.getHostAddress());
79+
}
80+
} catch (UnknownHostException e) {
81+
// For domain names that don't resolve (e.g., in test environments),
82+
// we allow the request to proceed and let the HTTP client handle it
83+
log.debug("Could not pre-resolve host {}, will allow HTTP client to handle: {}",
84+
host, e.getMessage());
85+
}
86+
}
87+
88+
log.debug("URL validation passed for: {}", url);
89+
}
90+
91+
/**
92+
* Checks if the string is an IP address literal (IPv4 or IPv6)
93+
*/
94+
private static boolean isIpAddressLiteral(String host) {
95+
// Check for IPv4 pattern with valid octet ranges (0-255)
96+
if (host.matches("^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$")) {
97+
return true;
98+
}
99+
// Check for IPv6 (contains colons)
100+
if (host.contains(":")) {
101+
return true;
102+
}
103+
return false;
104+
}
105+
106+
/**
107+
* Checks if the hostname is localhost or a localhost variant
108+
*/
109+
private static boolean isLocalhost(String host) {
110+
return host.equals("localhost") ||
111+
host.equals("127.0.0.1") ||
112+
host.equals("::1") ||
113+
host.equals("0.0.0.0") ||
114+
host.startsWith("localhost.") ||
115+
host.endsWith(".localhost");
116+
}
117+
118+
/**
119+
* Checks if an IP address is private, loopback, link-local, or reserved
120+
*/
121+
private static boolean isPrivateOrReservedAddress(InetAddress address) {
122+
// Check for loopback addresses (127.0.0.0/8, ::1)
123+
if (address.isLoopbackAddress()) {
124+
return true;
125+
}
126+
127+
// Check for link-local addresses (169.254.0.0/16, fe80::/10)
128+
if (address.isLinkLocalAddress()) {
129+
return true;
130+
}
131+
132+
// Check for site-local addresses (deprecated, but still blocked)
133+
// This covers 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16, fec0::/10
134+
if (address.isSiteLocalAddress()) {
135+
return true;
136+
}
137+
138+
// Check for multicast addresses
139+
if (address.isMulticastAddress()) {
140+
return true;
141+
}
142+
143+
// Check for any local address (0.0.0.0, ::)
144+
if (address.isAnyLocalAddress()) {
145+
return true;
146+
}
147+
148+
// Additional check for IPv4 private ranges that might not be caught
149+
byte[] bytes = address.getAddress();
150+
if (bytes.length == 4) {
151+
// Check 10.0.0.0/8
152+
if (bytes[0] == 10) {
153+
return true;
154+
}
155+
// Check 172.16.0.0/12
156+
if (bytes[0] == (byte) 172 && (bytes[1] & 0xF0) == 0x10) {
157+
return true;
158+
}
159+
// Check 192.168.0.0/16
160+
if (bytes[0] == (byte) 192 && bytes[1] == (byte) 168) {
161+
return true;
162+
}
163+
// Check 169.254.0.0/16 (AWS/Azure metadata service)
164+
if (bytes[0] == (byte) 169 && bytes[1] == (byte) 254) {
165+
return true;
166+
}
167+
// Check 127.0.0.0/8 (loopback)
168+
if (bytes[0] == 127) {
169+
return true;
170+
}
171+
// Check 0.0.0.0/8
172+
if (bytes[0] == 0) {
173+
return true;
174+
}
175+
}
176+
177+
return false;
178+
}
179+
}

0 commit comments

Comments
 (0)