Skip to content

Commit 3ac476a

Browse files
authored
Merge pull request #3944 from mattjoke/html-filter-fix
feat(core): added tags and attributes to HTML Parser
2 parents a8af2d4 + 12e1de9 commit 3ac476a

File tree

3 files changed

+84
-18
lines changed

3 files changed

+84
-18
lines changed

perun-base/src/main/java/cz/metacentrum/perun/core/api/CoreConfig.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ public void initBeansUtils() {
101101
private int idpLoginValidity;
102102
private List<String> idpLoginValidityExceptions;
103103
private int roleUpdateInterval;
104+
private boolean forceHTMLSanitization;
104105

105106
public int getGroupMaxConcurentGroupsToSynchronize() {
106107
return groupMaxConcurentGroupsToSynchronize;
@@ -800,6 +801,14 @@ public void setIntrospectionEndpointMfaAcrValue(String introspectionEndpointMfaA
800801
this.introspectionEndpointMfaAcrValue = introspectionEndpointMfaAcrValue;
801802
}
802803

804+
public boolean getForceHTMLSanitization() {
805+
return forceHTMLSanitization;
806+
}
807+
808+
public void setForceHTMLSanitization(boolean forceHTMLSanitization) {
809+
this.forceHTMLSanitization = forceHTMLSanitization;
810+
}
811+
803812
public int getMfaAuthTimeout() {
804813
return mfaAuthTimeout;
805814
}

perun-base/src/main/resources/perun-base.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@
8484
<property name="introspectionEndpointMfaAcrValue" value="${perun.introspectionEndpoint.mfaAcrValue}"/>
8585
<property name="idpLoginValidity" value="${perun.idpLoginValidity}"/>
8686
<property name="idpLoginValidityExceptions" value="#{'${perun.idpLoginValidityExceptions}'.split('\s*,\s*')}"/>
87+
<property name="forceHTMLSanitization" value="${perun.forceHtmlSanitization}"/>
8788
</bean>
8889

8990

@@ -187,6 +188,7 @@
187188
<prop key="perun.introspectionEndpoint.mfaAuthTimeoutPercentageForceLogIn">75</prop>
188189
<prop key="perun.introspectionEndpoint.mfaAcrValue">https://refeds.org/profile/mfa</prop>
189190
<prop key="perun.enforceMfa">false</prop>
191+
<prop key="perun.forceHtmlSanitization">false</prop>
190192
</props>
191193
</property>
192194
</bean>

perun-core/src/main/java/cz/metacentrum/perun/core/impl/HTMLParser.java

Lines changed: 73 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package cz.metacentrum.perun.core.impl;
22

3+
import cz.metacentrum.perun.core.api.BeansUtils;
34
import org.owasp.html.CssSchema;
45
import org.owasp.html.HtmlChangeListener;
56
import org.owasp.html.HtmlPolicyBuilder;
@@ -18,19 +19,20 @@
1819
* @author: Matej Hakoš <[email protected]>
1920
*/
2021
public class HTMLParser {
21-
private static final String[] allowedTags = {"a", "article", "aside", "b", "blockquote", "br", "button", "caption", "center", "cite", "decorator", "del", "details", "div", "em", "footer", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "i", "img", "kbd", "label", "li", "ol", "p", "pre", "section", "select", "span", "strong", "sup", "table", "tbody", "td", "textarea", "tfoot", "th", "thead", "tr", "ul"};
22-
private static final String[] allowedAttributes = {"align", "class", "color", "disabled", "height", "hidden", "href", "id", "label", "size", "span", "src", "srcset", "style", "width"};
23-
private static final String[] allowedStyles = {"color", "background-color", "font-size", "font-family", "text-align", "margin", "padding", "border", "width", "height", "display", "position", "top", "bottom", "left", "right", "overflow", "float", "clear", "z-index"};
22+
private static final String[] allowedTags = {"a", "article", "aside", "b", "blockquote", "br", "button", "caption", "center", "cite", "decorator", "del", "details", "div", "em", "footer", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "i", "img", "kbd", "label", "li", "ol", "p", "pre", "section", "select", "span", "strong", "sup", "table", "tbody", "td", "textarea", "tfoot", "th", "thead", "tr", "u", "ul"};
23+
private static final String[] allowedAttributes = {"align", "class", "color", "data-lang", "disabled", "height", "hidden", "href", "id", "label", "rel", "size", "span", "src", "srcset", "style", "type", "width"};
24+
private static final String[] allowedStyles = {"color", "background-color", "font-size", "font-family", "text-align", "margin", "padding", "border", "width", "max-height", "height", "display", "position", "top", "bottom", "left", "right", "overflow", "float", "clear", "z-index"};
2425
private static final String[] allowedUrlProtocols = {"https", "mailto"};
2526
private static PolicyFactory policy = null;
2627

2728
private List<String> escapedTags = new ArrayList<>();
2829
private List<String> escapedAttributes = new ArrayList<>();
2930
private List<String> escapedStyles = new ArrayList<>();
31+
private List<String> escapedLinks = new ArrayList<>();
3032

3133
private String rawHTML = "";
3234
private String escapedHTML = "";
33-
private String[] escapedStrings = new String[]{"", "", ""};
35+
private String[] escapedStrings = new String[]{"", "", "", ""};
3436
private boolean isInputValid = true;
3537

3638
public HTMLParser() {
@@ -54,6 +56,10 @@ private static PolicyFactory generatePolicy() {
5456
}
5557
CssSchema cssWhitelist = CssSchema.withProperties(Arrays.stream(allowedStyles).toList());
5658
p.allowStyling(cssWhitelist);
59+
60+
// allow 'href' and 'target' attribute on 'a' tags
61+
p.allowAttributes("href").onElements("a");
62+
p.allowAttributes("target").onElements("a");
5763
return p.toFactory();
5864
}
5965

@@ -64,11 +70,11 @@ private static PolicyFactory generatePolicy() {
6470
* @param escaped - escaped input
6571
*/
6672
private void computeEscapedStyles(String input, String escaped) {
67-
Pattern pattern = Pattern.compile("style=\"(.*?)\"");
73+
Pattern pattern = Pattern.compile("style=(\"|')(.*?)(\"|')");
6874
if (input == null || escaped == null) return;
6975
Matcher matcher = pattern.matcher(input);
7076
while (matcher.find()) {
71-
String style = matcher.group(1);
77+
String style = matcher.group(2);
7278
String[] styles = style.split(";");
7379
for (String s : styles) {
7480
String[] split = s.split(":");
@@ -79,7 +85,7 @@ private void computeEscapedStyles(String input, String escaped) {
7985
}
8086
matcher = pattern.matcher(escaped);
8187
while (matcher.find()) {
82-
String style = matcher.group(1);
88+
String style = matcher.group(2);
8389
String[] styles = style.split(";");
8490
for (String s : styles) {
8591
String[] split = s.split(":");
@@ -90,6 +96,27 @@ private void computeEscapedStyles(String input, String escaped) {
9096
}
9197
}
9298

99+
/**
100+
* Computes the difference between all links in the escaped and unescaped input.
101+
*
102+
* @param input - unescaped input
103+
* @param escaped - escaped input
104+
*/
105+
public void computeInvalidLink(String input, String escaped) {
106+
Pattern pattern = Pattern.compile("href=(\"|')(.*?)(\"|')");
107+
if (input == null || escaped == null) return;
108+
Matcher matcher = pattern.matcher(input);
109+
while (matcher.find()) {
110+
// Multiple groups, url is in group 2
111+
String link = matcher.group(2).trim();
112+
escapedLinks.add(link);
113+
}
114+
115+
for(String protocol : allowedUrlProtocols) {
116+
escapedLinks.removeIf(link -> link.startsWith(protocol));
117+
}
118+
}
119+
93120
/**
94121
* Clears the list of escaped tags and attributes.
95122
* Recomputes the policy and resets the escapedHTML/unescapedHTML and escapedStrings.
@@ -99,9 +126,10 @@ public HTMLParser clear() {
99126
escapedTags.clear();
100127
escapedAttributes.clear();
101128
escapedStyles.clear();
129+
escapedLinks.clear();
102130
escapedHTML = "";
103131
rawHTML = "";
104-
escapedStrings = new String[]{"", "", ""};
132+
escapedStrings = new String[]{"", "", "", ""};
105133
isInputValid = true;
106134
policy = generatePolicy();
107135
return this;
@@ -112,8 +140,10 @@ public HTMLParser clear() {
112140
* @return escapedHTML - sanitized HTML input
113141
*/
114142
public String getEscapedHTML() {
115-
return rawHTML;
116-
// return escapedHTML;
143+
if (!BeansUtils.getCoreConfig().getForceHTMLSanitization()) {
144+
return rawHTML;
145+
}
146+
return escapedHTML;
117147
}
118148

119149
/**
@@ -129,8 +159,10 @@ public String getRawInput() {
129159
* Returns validity of the last input.
130160
*/
131161
public boolean isInputValid() {
132-
return true;
133-
// return isInputValid;
162+
if(!BeansUtils.getCoreConfig().getForceHTMLSanitization()){
163+
return true;
164+
}
165+
return isInputValid;
134166
}
135167

136168
/**
@@ -150,15 +182,26 @@ public String[] getEscaped() {
150182
*/
151183
public HTMLParser sanitizeHTML(String input) {
152184
rawHTML = input;
185+
if (!BeansUtils.getCoreConfig().getForceHTMLSanitization()) {
186+
return this;
187+
}
153188
HtmlChangeListener<List<List<String>>> listener = new HtmlChangeListener<>() {
154189
@Override
155190
public void discardedTag(@Nullable List<List<String>> output, String tag) {
191+
if (tag.equals("a")) {
192+
return;
193+
}
156194
output.get(0).add(tag);
157195
}
158196

159197
@Override
160198
public void discardedAttributes(@Nullable List<List<String>> output, String tag, String... attributes) {
161-
output.get(1).add(Arrays.toString(attributes) + " in " + tag);
199+
// Remove 'href', because it gets computed in computeInvalidLink
200+
List<String> attrs = Arrays.stream(attributes).filter(attr -> !attr.equals("href")).toList();
201+
if (attrs.isEmpty()) {
202+
return;
203+
}
204+
output.get(1).add(tag + " " + attrs);
162205
}
163206
};
164207

@@ -167,12 +210,20 @@ public void discardedAttributes(@Nullable List<List<String>> output, String tag,
167210
output.add(new ArrayList<>());
168211
output.add(new ArrayList<>());
169212
String escapedOutput = policy.sanitize(input, listener, output);
213+
170214
escapedTags = output.get(0);
171215
escapedAttributes = output.get(1);
172216
computeEscapedStyles(input, escapedOutput);
217+
computeInvalidLink(input, escapedOutput);
218+
219+
// Remove whitespaces and filter empty strings
220+
escapedTags = escapedTags.stream().map(String::trim).filter(s -> !s.isEmpty()).toList();
221+
escapedAttributes = escapedAttributes.stream().map(String::trim).filter(s -> !s.isEmpty()).toList();
222+
escapedStyles = escapedStyles.stream().map(String::trim).filter(s -> !s.isEmpty()).toList();
223+
escapedLinks = escapedLinks.stream().map(String::trim).filter(s -> !s.isEmpty()).toList();
173224

174225
escapedHTML = escapedOutput;
175-
isInputValid = escapedTags.isEmpty() && escapedAttributes.isEmpty() && escapedStyles.isEmpty();
226+
isInputValid = escapedTags.isEmpty() && escapedAttributes.isEmpty() && escapedStyles.isEmpty() && escapedLinks.isEmpty();
176227
return this;
177228
}
178229

@@ -185,13 +236,14 @@ public void discardedAttributes(@Nullable List<List<String>> output, String tag,
185236
*/
186237
public HTMLParser checkEscapedHTML(String escaped, String unescaped) {
187238
if (escaped.equals(unescaped) || isInputValid){
188-
escapedStrings = new String[]{"", "", ""};
239+
escapedStrings = new String[]{"", "", "", ""};
189240
return this;
190241
}
191242

192243
escapedStrings[0] = String.join(", ", escapedTags);
193244
escapedStrings[1] = String.join(", ", escapedAttributes);
194245
escapedStrings[2] = String.join(", ", escapedStyles);
246+
escapedStrings[3] = String.join(", ", escapedLinks);
195247
return this;
196248
}
197249

@@ -208,14 +260,17 @@ public HTMLParser checkEscapedHTML() {
208260
public static String getMessage(String[] escaped) {
209261
StringBuilder message = new StringBuilder();
210262
if (escaped[0].length() > 0) {
211-
message.append("The following tags are not allowed: ").append(escaped[0]).append(". ");
263+
message.append("Following tags are invalid: ").append(escaped[0]).append(". ");
212264
}
213265
if (escaped[1].length() > 0) {
214-
message.append("The following attributes are not allowed: ").append(escaped[1]).append(". ");
266+
message.append("Following attributes are invalid: ").append(escaped[1]).append(". ");
215267
}
216268
if (escaped[2].length() > 0) {
217-
message.append("The following styles are not allowed: ").append(escaped[2]).append(". ");
269+
message.append("Following styles are invalid: ").append(escaped[2]).append(". ");
218270
}
271+
if (escaped[3].length() > 0) {
272+
message.append("'").append(escaped[3]).append("' links contain invalid protocol (allowed: https, mailto).");
273+
}
219274
return message.toString();
220275
}
221276

0 commit comments

Comments
 (0)