Skip to content

Commit a467e30

Browse files
wynnteoWynn Teo
andauthored
BAEL-9261 (#18548)
Co-authored-by: Wynn Teo <[email protected]>
1 parent e2699a0 commit a467e30

File tree

3 files changed

+93
-0
lines changed

3 files changed

+93
-0
lines changed

jsoup/pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,16 @@
1919
<artifactId>jsoup</artifactId>
2020
<version>${jsoup.version}</version>
2121
</dependency>
22+
<dependency>
23+
<groupId>com.googlecode.owasp-java-html-sanitizer</groupId>
24+
<artifactId>owasp-java-html-sanitizer</artifactId>
25+
<version>${owasp.version}</version>
26+
</dependency>
2227
</dependencies>
2328

2429
<properties>
2530
<jsoup.version>1.17.2</jsoup.version>
31+
<owasp.version>20240325.1</owasp.version>
2632
</properties>
2733

2834
</project>
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package com.baeldung.jsoup;
2+
3+
import org.jsoup.Jsoup;
4+
import org.jsoup.safety.Safelist;
5+
import org.owasp.html.HtmlPolicyBuilder;
6+
import org.owasp.html.PolicyFactory;
7+
import org.owasp.html.Sanitizers;
8+
9+
public class HTMLSanitizer {
10+
11+
private static final PolicyFactory POLICY = Sanitizers.FORMATTING.and(Sanitizers.LINKS);
12+
private static final PolicyFactory HTML_POLICY = new HtmlPolicyBuilder().allowCommonBlockElements()
13+
.allowCommonInlineFormattingElements()
14+
.toFactory();
15+
16+
private static final PolicyFactory CUSTOM_POLICY = new HtmlPolicyBuilder().allowElements("a", "p", "div", "span", "h1", "h2", "h3")
17+
.allowUrlProtocols("https")
18+
.allowAttributes("href")
19+
.onElements("a")
20+
.requireRelNofollowOnLinks()
21+
.allowAttributes("class")
22+
.globally()
23+
.allowStyling()
24+
.toFactory();
25+
26+
public static String sanitizeUsingBasic(String htmlContent) {
27+
return POLICY.sanitize(htmlContent);
28+
}
29+
30+
public static String sanitizeUsingHTMLPolicy(String html) {
31+
return HTML_POLICY.sanitize(html);
32+
}
33+
34+
public static String sanitizeUsingCustomPolicy(String html) {
35+
return CUSTOM_POLICY.sanitize(html);
36+
}
37+
38+
public static String sanitizeUsingJsoup(String html) {
39+
Safelist safelist = Safelist.basic()
40+
.addTags("h1", "h2", "h3")
41+
.addAttributes("a", "target")
42+
.addProtocols("a", "href", "http", "https");
43+
return Jsoup.clean(html, safelist);
44+
}
45+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package com.baeldung.jsoup;
2+
3+
import static org.junit.jupiter.api.Assertions.assertEquals;
4+
import org.junit.jupiter.api.Test;
5+
6+
public class HTMLSanitizerUnitTest {
7+
8+
@Test
9+
void givenScriptAndBasicTags_whenSanitizedWithBasicPolicy_thenStripScriptAndKeepFormatting() {
10+
String input = "<script>alert('XSS')</script><b>Hello</b> <a href='https://example.com'>link</a>";
11+
String expectedOutput = "<b>Hello</b> <a href=\"https://example.com\" rel=\"nofollow\">link</a>";
12+
13+
String sanitized = HTMLSanitizer.sanitizeUsingBasic(input);
14+
assertEquals(expectedOutput, sanitized);
15+
}
16+
17+
@Test
18+
void givenStyledHeadingAndUnsafeLink_whenSanitizedWithCustomPolicy_thenAllowOnlySafeContent() {
19+
String input = "<h1 class='title' style='color:red;'>Welcome</h1>"
20+
+ "<a href='https://example.com' onclick='stealCookies()'>Click</a>"
21+
+ "<script>alert('xss');</script>";
22+
String expectedOutput = "<h1 class=\"title\" style=\"color:red\">Welcome</h1><a href=\"https://example.com\" rel=\"nofollow\">Click</a>";
23+
String sanitized = HTMLSanitizer.sanitizeUsingCustomPolicy(input);
24+
assertEquals(expectedOutput, sanitized);
25+
}
26+
27+
@Test
28+
void givenMixedHtml_whenSanitizedWithCustomPolicy_thenApplyCustomRules() {
29+
String input = "<div><span style='color:blue'>Hello</span><script>alert('hack')</script></div>";
30+
String expectedOutput = "<div><span style=\"color:blue\">Hello</span></div>";
31+
String sanitized = HTMLSanitizer.sanitizeUsingCustomPolicy(input);
32+
assertEquals(expectedOutput, sanitized);
33+
}
34+
35+
@Test
36+
void givenJavascriptHrefAndTargetAttribute_whenSanitizedWithJsoup_thenOnlyAllowSafeContent() {
37+
String input = "<h1 onclick='x()'>Title</h1><a href='javascript:alert(1)' target='_blank'>Click</a>";
38+
String expectedOutput = "<h1>Title</h1><a target=\"_blank\" rel=\"nofollow\">Click</a>";
39+
String sanitized = HTMLSanitizer.sanitizeUsingJsoup(input);
40+
assertEquals(expectedOutput, sanitized);
41+
}
42+
}

0 commit comments

Comments
 (0)