Skip to content

Commit eb82b07

Browse files
authored
Add XXE protection for XMLReader (#386)
Add protection for `XMLReader` with inlined protection calls.
1 parent 43abbea commit eb82b07

File tree

12 files changed

+673
-1
lines changed

12 files changed

+673
-1
lines changed

core-codemods/src/main/java/io/codemodder/codemods/DefaultCodemods.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ public static List<Class<? extends CodeChanger>> asList() {
2626
HardenProcessCreationCodemod.class,
2727
HardenXMLDecoderCodemod.class,
2828
HardenXMLInputFactoryCodemod.class,
29+
HardenXMLReaderCodemod.class,
2930
HardenXStreamCodemod.class,
3031
HardenZipEntryPathsCodemod.class,
3132
HQLParameterizationCodemod.class,
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
package io.codemodder.codemods;
2+
3+
import com.contrastsecurity.sarif.Result;
4+
import com.contrastsecurity.sarif.SarifSchema210;
5+
import com.github.javaparser.ast.CompilationUnit;
6+
import com.github.javaparser.ast.NodeList;
7+
import com.github.javaparser.ast.expr.BooleanLiteralExpr;
8+
import com.github.javaparser.ast.expr.Expression;
9+
import com.github.javaparser.ast.expr.MethodCallExpr;
10+
import com.github.javaparser.ast.expr.StringLiteralExpr;
11+
import com.github.javaparser.ast.stmt.BlockStmt;
12+
import com.github.javaparser.ast.stmt.ExpressionStmt;
13+
import com.github.javaparser.ast.stmt.Statement;
14+
import io.codemodder.*;
15+
import io.codemodder.javaparser.ChangesResult;
16+
import io.codemodder.providers.sarif.semgrep.SemgrepRunner;
17+
import io.codemodder.providers.sarif.semgrep.SemgrepScan;
18+
import java.io.IOException;
19+
import java.nio.file.Files;
20+
import java.nio.file.Path;
21+
import java.util.ArrayList;
22+
import java.util.List;
23+
import java.util.Optional;
24+
import javax.inject.Inject;
25+
import org.slf4j.Logger;
26+
import org.slf4j.LoggerFactory;
27+
28+
/**
29+
* Disables external entity resolution in {@link org.xml.sax.XMLReader} use. This codemod takes a
30+
* different approach than similarly-purposed {@link HardenXMLInputFactoryCodemod}. It attempts to
31+
* inline the necessary calls to {@link org.xml.sax.XMLReader#setFeature(String, boolean)} to
32+
* disable external entities. It must be somewhat clever about this in case one is already present,
33+
* only presenting the one that's needed. We could do this inline with JavaParser inspection but it
34+
* is more robust to use Semgrep to determine which settings are needed.
35+
*/
36+
@Codemod(
37+
id = "pixee:java/harden-xmlreader",
38+
importance = Importance.HIGH,
39+
reviewGuidance = ReviewGuidance.MERGE_WITHOUT_REVIEW)
40+
public final class HardenXMLReaderCodemod extends SarifPluginJavaParserChanger<MethodCallExpr> {
41+
42+
private final SemgrepRunner semgrepRunner;
43+
44+
@Inject
45+
public HardenXMLReaderCodemod(@SemgrepScan(ruleId = "harden-xmlreader") final RuleSarif sarif) {
46+
super(sarif, MethodCallExpr.class, RegionNodeMatcher.MATCHES_START);
47+
this.semgrepRunner = SemgrepRunner.createDefault();
48+
}
49+
50+
@Override
51+
public ChangesResult onResultFound(
52+
final CodemodInvocationContext context,
53+
final CompilationUnit cu,
54+
final MethodCallExpr parseCall, // parse is a vulnerable parse() call on an XMLReader
55+
final Result result) {
56+
57+
// job #1 -- check what setFeatures() are called to see which we don't need to add
58+
SettingsNeededToInject required;
59+
60+
try {
61+
required = gatherRequiredSettings(context, parseCall);
62+
} catch (IOException e) {
63+
log.warn("issue running semgrep to figure out needed calls", e);
64+
return ChangesResult.noChanges;
65+
}
66+
67+
Optional<Expression> scope = parseCall.getScope();
68+
if (scope.isEmpty()) {
69+
return ChangesResult.noChanges;
70+
}
71+
72+
// add the required reader settings to prevent external entity resolution
73+
Expression reader = scope.get();
74+
List<Statement> statements = new ArrayList<>();
75+
if (required.externalGeneralEntities) {
76+
MethodCallExpr setFeature = new MethodCallExpr(reader, "setFeature");
77+
setFeature.addArgument(
78+
new StringLiteralExpr("http://xml.org/sax/features/external-general-entities"));
79+
setFeature.addArgument(new BooleanLiteralExpr(false));
80+
statements.add(new ExpressionStmt(setFeature));
81+
}
82+
if (required.externalParameterEntities) {
83+
MethodCallExpr setFeature = new MethodCallExpr(reader, "setFeature");
84+
setFeature.addArgument(
85+
new StringLiteralExpr("http://xml.org/sax/features/external-parameter-entities"));
86+
setFeature.addArgument(new BooleanLiteralExpr(false));
87+
statements.add(new ExpressionStmt(setFeature));
88+
}
89+
90+
Optional<Statement> parseStatementRef = parseCall.findAncestor(Statement.class);
91+
92+
if (parseStatementRef.isEmpty()) {
93+
return ChangesResult.noChanges;
94+
}
95+
Statement parseStatement = parseStatementRef.get();
96+
Optional<BlockStmt> blockStmt = parseStatement.findAncestor(BlockStmt.class);
97+
if (blockStmt.isEmpty()) {
98+
return ChangesResult.noChanges;
99+
}
100+
101+
NodeList<Statement> existingStatements = blockStmt.get().getStatements();
102+
int parseStatementIndex = existingStatements.indexOf(parseStatement);
103+
existingStatements.addAll(parseStatementIndex, statements);
104+
return ChangesResult.changesApplied;
105+
}
106+
107+
private SettingsNeededToInject gatherRequiredSettings(
108+
final CodemodInvocationContext context, final MethodCallExpr parseCall) throws IOException {
109+
Path needsBothRuleFile = createRuleFile("harden-xmlreader-needs-both.yaml");
110+
Path needsGeneralRuleFile = createRuleFile("harden-xmlreader-just-needs-general.yaml");
111+
Path needsParameterRuleFile = createRuleFile("harden-xmlreader-just-needs-parameter.yaml");
112+
113+
try {
114+
return getSettingsNeededToInject(
115+
context, parseCall, needsBothRuleFile, needsGeneralRuleFile, needsParameterRuleFile);
116+
} finally {
117+
needsBothRuleFile.toFile().delete();
118+
needsGeneralRuleFile.toFile().delete();
119+
needsParameterRuleFile.toFile().delete();
120+
}
121+
}
122+
123+
private SettingsNeededToInject getSettingsNeededToInject(
124+
final CodemodInvocationContext context,
125+
final MethodCallExpr parseCall,
126+
final Path needsBothRuleFile,
127+
final Path needsGeneralRuleFile,
128+
final Path needsParameterRuleFile)
129+
throws IOException {
130+
Path codeDir = context.codeDirectory().asPath();
131+
List<String> codeFilePath = List.of(context.path().toString());
132+
133+
// find all .parse() calls that require both external calls -- probably the most common case
134+
SarifSchema210 sarif =
135+
semgrepRunner.run(List.of(needsBothRuleFile), codeDir, codeFilePath, List.of());
136+
137+
needsBothRuleFile.toFile().delete();
138+
139+
if (hasParseCallInResults(sarif, parseCall)) {
140+
return new SettingsNeededToInject(true, true);
141+
}
142+
143+
// find all .parse() calls that require only external-general restrictions
144+
sarif = semgrepRunner.run(List.of(needsGeneralRuleFile), codeDir, codeFilePath, List.of());
145+
146+
needsGeneralRuleFile.toFile().delete();
147+
148+
if (hasParseCallInResults(sarif, parseCall)) {
149+
return new SettingsNeededToInject(true, false);
150+
}
151+
152+
// find all .parse() calls that require one external-parameter restrictions
153+
sarif = semgrepRunner.run(List.of(needsParameterRuleFile), codeDir, codeFilePath, List.of());
154+
155+
needsParameterRuleFile.toFile().delete();
156+
157+
if (hasParseCallInResults(sarif, parseCall)) {
158+
return new SettingsNeededToInject(false, true);
159+
}
160+
161+
// just return both and "fail closed" to make sure we send a fix, even if it's somehow overkill
162+
log.warn(
163+
"We matched the parse call but can't determine which settings are needed. Defaulting to both. Are all the YAML patterns aligned?");
164+
return new SettingsNeededToInject(true, true);
165+
}
166+
167+
private Path createRuleFile(final String ruleFileName) throws IOException {
168+
// copy the classpath entry to disk so semgrep can read it
169+
Path tempFile = Files.createTempFile("xxe", ".yaml");
170+
tempFile.toFile().deleteOnExit();
171+
String classpathPath = "io/codemodder/codemods/" + ruleFileName;
172+
try (var is = getClass().getClassLoader().getResourceAsStream(classpathPath)) {
173+
Files.copy(is, tempFile, java.nio.file.StandardCopyOption.REPLACE_EXISTING);
174+
}
175+
return tempFile;
176+
}
177+
178+
private boolean hasParseCallInResults(
179+
final SarifSchema210 sarif, final MethodCallExpr parseCall) {
180+
return sarif.getRuns().get(0).getResults().stream()
181+
.map(r -> r.getLocations().get(0))
182+
.anyMatch(
183+
l ->
184+
l.getPhysicalLocation().getRegion().getStartLine()
185+
== parseCall.getRange().get().begin.line);
186+
}
187+
188+
/** Models which settings need to be injected. */
189+
record SettingsNeededToInject(
190+
boolean externalGeneralEntities, boolean externalParameterEntities) {}
191+
192+
private static final Logger log = LoggerFactory.getLogger(HardenXMLReaderCodemod.class);
193+
}

core-codemods/src/main/resources/io/codemodder/codemods/HardenXMLInputFactoryCodemod/report.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"summary" : "Introduced protections against XXE attacks",
2+
"summary" : "Introduced protections against XXE attacks in XMLInputFactory",
33
"control" : "https://github.com/pixee/java-security-toolkit/blob/main/src/main/java/io/github/pixee/security/XMLDecoderSecurity.java",
44
"change" : "Hardened the XML processor to prevent external entities from being resolved, which can prevent data exfiltration and arbitrary code execution",
55
"reviewGuidanceIJustification" : "We believe this change is safe and effective. The behavior of hardened `XMLInputFactory` instances will only be different if the XML they process uses external entities, which is exceptionally rare (and, as demonstrated, quite unsafe anyway.)",
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
This change updates [XMLReader](https://docs.oracle.com/en/java/javase/17/docs/api/java.xml/org/xml/sax/XMLReader.html) to prevent resolution of external entities, which can protect you from arbitrary code execution, sensitive data exfiltration, and probably a bunch more evil things attackers are still discovering.
2+
3+
Without this protection, attackers can cause your `XMLReader` parser to retrieve sensitive information with attacks like this:
4+
5+
```xml
6+
<?xml version="1.0" encoding="UTF-8"?>
7+
<!DOCTYPE foo [ <!ENTITY xxe SYSTEM "file:///etc/passwd"> ]>
8+
<book>
9+
<title>&xxe;</title>
10+
</book>
11+
```
12+
13+
Yes, it's pretty insane that this is the default behavior. Our change hardens the reader with the necessary security features to prevent your parser from resolving external entities.
14+
15+
```diff
16+
XMLReader reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
17+
StringReader sr = new StringReader(xml);
18+
+ reader.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
19+
+ reader.setFeature("http://xml.org/sax/features/external-general-entities", false);
20+
reader.parse(new InputSource(sr));
21+
```
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"summary" : "Introduced protections against XXE attacks in XMLReader",
3+
"change" : "Hardened the XMLReader to prevent external entities from being resolved, which can prevent data exfiltration and arbitrary code execution",
4+
"reviewGuidanceIJustification" : "We believe this change is safe and effective. The behavior of hardened `XMLReader` instances will only be different if the XML they process uses external entities, which is exceptionally rare (and, as demonstrated, quite unsafe anyway.)",
5+
"references" : ["https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html", "https://owasp.org/www-community/vulnerabilities/XML_External_Entity_(XXE)_Processing", "https://github.com/swisskyrepo/PayloadsAllTheThings/blob/master/XXE%20Injection/README.md"]
6+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
rules:
2+
- id: harden-xmlreader-just-needs-general
3+
patterns:
4+
- pattern: $XMLR.parse(...);
5+
- pattern-inside: |
6+
$RT $METHOD ($ARGS) {
7+
...
8+
$XMLR = XMLReaderFactory.createXMLReader(...);
9+
...
10+
}
11+
- pattern-not-inside: >
12+
$RT $METHOD ($ARGS) {
13+
...
14+
$XMLR.setFeature("http://xml.org/sax/features/external-general-entities", false);
15+
...
16+
}
17+
- pattern-inside: >
18+
$RT $METHOD ($ARGS) {
19+
...
20+
$XMLR.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
21+
...
22+
}
23+
- focus-metavariable: $XMLR
24+
message: Semgrep found a match
25+
languages:
26+
- java
27+
severity: WARNING
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
rules:
2+
- id: harden-xmlreader-just-needs-parameter
3+
patterns:
4+
- pattern: $XMLR.parse(...);
5+
- pattern-inside: |
6+
$RT $METHOD ($ARGS) {
7+
...
8+
$XMLR = XMLReaderFactory.createXMLReader(...);
9+
...
10+
}
11+
- pattern-inside: >
12+
$RT $METHOD ($ARGS) {
13+
...
14+
$XMLR.setFeature("http://xml.org/sax/features/external-general-entities", false);
15+
...
16+
}
17+
- pattern-not-inside: >
18+
$RT $METHOD ($ARGS) {
19+
...
20+
$XMLR.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
21+
...
22+
}
23+
- focus-metavariable: $XMLR
24+
message: Semgrep found a match
25+
languages:
26+
- java
27+
severity: WARNING
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
rules:
2+
- id: harden-xmlreader-needs-both
3+
patterns:
4+
- pattern: $XMLR.parse(...);
5+
- pattern-inside: |
6+
$RT $METHOD ($ARGS) {
7+
...
8+
$XMLR = XMLReaderFactory.createXMLReader(...);
9+
...
10+
}
11+
- pattern-not-inside: >
12+
$RT $METHOD ($ARGS) {
13+
...
14+
$XMLR.setFeature("http://xml.org/sax/features/external-general-entities", false);
15+
...
16+
}
17+
- pattern-not-inside: >
18+
$RT $METHOD ($ARGS) {
19+
...
20+
$XMLR.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
21+
...
22+
}
23+
- focus-metavariable: $XMLR
24+
message: Semgrep found a match
25+
languages:
26+
- java
27+
severity: WARNING
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
rules:
2+
- id: harden-xmlreader
3+
patterns:
4+
- pattern: $XMLR.parse(...);
5+
- pattern-inside: |
6+
$RT $METHOD ($ARGS) {
7+
...
8+
$XMLR = XMLReaderFactory.createXMLReader(...);
9+
...
10+
}
11+
- pattern-not-inside: |
12+
$RT $METHOD ($ARGS) {
13+
...
14+
$XMLR.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
15+
...
16+
}
17+
- pattern-not-inside: |
18+
$RT $METHOD ($ARGS) {
19+
...
20+
$XMLR.setFeature("http://xml.org/sax/features/external-general-entities", false);
21+
...
22+
$XMLR.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
23+
...
24+
}
25+
- pattern-not-inside: |
26+
$RT $METHOD ($ARGS) {
27+
...
28+
$XMLR.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
29+
...
30+
$XMLR.setFeature("http://xml.org/sax/features/external-general-entities", false);
31+
...
32+
}
33+
- focus-metavariable: $XMLR
34+
message: "Semgrep found a match"
35+
languages:
36+
- java
37+
severity: WARNING

0 commit comments

Comments
 (0)