Skip to content

Commit 2250c05

Browse files
authored
Merge pull request #33 from simplesamlphp/feature/xpath-assert
Feature: Add assertion to test XPath filters against an allow-list for axes and functions
2 parents 6df7b6c + eae1f25 commit 2250c05

File tree

6 files changed

+680
-12
lines changed

6 files changed

+680
-12
lines changed

src/Assert/Assert.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,23 @@
99
/**
1010
* @package simplesamlphp/xml-common
1111
*
12+
* @method static void validAllowedXPathFilter(mixed $value, array $allowed_axes, array $allowed_functions, string $message = '', string $exception = '')
1213
* @method static void validHexBinary(mixed $value, string $message = '', string $exception = '')
1314
* @method static void validNMToken(mixed $value, string $message = '', string $exception = '')
1415
* @method static void validNMTokens(mixed $value, string $message = '', string $exception = '')
1516
* @method static void validDuration(mixed $value, string $message = '', string $exception = '')
1617
* @method static void validDateTime(mixed $value, string $message = '', string $exception = '')
1718
* @method static void validNCName(mixed $value, string $message = '', string $exception = '')
1819
* @method static void validQName(mixed $value, string $message = '', string $exception = '')
20+
* @method static void nullOrValidAllowedXPathFilter(mixed $value, array $allowed_axes, array $allowed_functions, string $message = '', string $exception = '')
1921
* @method static void nullOrValidHexBinary(mixed $value, string $message = '', string $exception = '')
2022
* @method static void nullOrValidNMToken(mixed $value, string $message = '', string $exception = '')
2123
* @method static void nullOrValidNMTokens(mixed $value, string $message = '', string $exception = '')
2224
* @method static void nullOrValidDuration(mixed $value, string $message = '', string $exception = '')
2325
* @method static void nullOrValidDateTime(mixed $value, string $message = '', string $exception = '')
2426
* @method static void nullOrValidNCName(mixed $value, string $message = '', string $exception = '')
2527
* @method static void nullOrValidQName(mixed $value, string $message = '', string $exception = '')
28+
* @method static void allValidAllowedXPathFilter(mixed $value, array $allowed_axes, array $allowed_functions, string $message = '', string $exception = '')
2629
* @method static void allValidHexBinary(mixed $value, string $message = '', string $exception = '')
2730
* @method static void allValidNMToken(mixed $value, string $message = '', string $exception = '')
2831
* @method static void allValidNMTokens(mixed $value, string $message = '', string $exception = '')
@@ -38,4 +41,5 @@ class Assert extends BaseAssert
3841
use HexBinTrait;
3942
use NamesTrait;
4043
use TokensTrait;
44+
use XPathFilterTrait;
4145
}

src/Assert/XPathFilterTrait.php

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace SimpleSAML\XML\Assert;
6+
7+
use InvalidArgumentException;
8+
use SimpleSAML\Assert\Assert as BaseAssert;
9+
use SimpleSAML\XML\Constants as C;
10+
use SimpleSAML\XML\Exception\RuntimeException;
11+
use SimpleSAML\XML\Utils\XPathFilter;
12+
13+
use function sprintf;
14+
15+
/**
16+
* @package simplesamlphp/xml-common
17+
*/
18+
trait XPathFilterTrait
19+
{
20+
/***********************************************************************************
21+
* NOTE: Custom assertions may be added below this line. *
22+
* They SHOULD be marked as `private` to ensure the call is forced *
23+
* through __callStatic(). *
24+
* Assertions marked `public` are called directly and will *
25+
* not handle any custom exception passed to it. *
26+
***********************************************************************************/
27+
28+
/**
29+
* Check an XPath expression for allowed axes and functions
30+
* The goal is preventing DoS attacks by limiting the complexity of the XPath expression by only allowing
31+
* a select subset of functions and axes.
32+
* The check uses a list of allowed functions and axes, and throws an exception when an unknown function
33+
* or axis is found in the $xpathExpression.
34+
*
35+
* Limitations:
36+
* - The implementation is based on regular expressions, and does not employ an XPath 1.0 parser. It may not
37+
* evaluate all possible valid XPath expressions correctly and cause either false positives for valid
38+
* expressions or false negatives for invalid expressions.
39+
* - The check may still allow expressions that are not safe, I.e. expressions that consist of only
40+
* functions and axes that are deemed "save", but that are still slow to evaluate. The time it takes to
41+
* evaluate an XPath expression depends on the complexity of both the XPath expression and the XML document.
42+
* This check, however, does not take the XML document into account, nor is it aware of the internals of the
43+
* XPath processor that will evaluate the expression.
44+
* - The check was written with the XPath 1.0 syntax in mind, but should work equally well for XPath 2.0 and 3.0.
45+
*
46+
* @param string $xpathExpression
47+
* @param array<string> $allowedAxes
48+
* @param array<string> $allowedFunctions
49+
* @param string $message
50+
*/
51+
public static function validAllowedXPathFilter(
52+
string $xpathExpression,
53+
array $allowedAxes = C::DEFAULT_ALLOWED_AXES,
54+
array $allowedFunctions = C::DEFAULT_ALLOWED_FUNCTIONS,
55+
string $message = '',
56+
): void {
57+
BaseAssert::allString($allowedAxes);
58+
BaseAssert::allString($allowedFunctions);
59+
BaseAssert::maxLength(
60+
$xpathExpression,
61+
C::XPATH_FILTER_MAX_LENGTH,
62+
sprintf('XPath Filter exceeds the limit of 100 characters.'),
63+
);
64+
65+
try {
66+
// First remove the contents of any string literals in the $xpath to prevent false positives
67+
$xpathWithoutStringLiterals = XPathFilter::removeStringContents($xpathExpression);
68+
69+
// Then check that the xpath expression only contains allowed functions and axes, throws when it doesn't
70+
XPathFilter::filterXPathFunction($xpathWithoutStringLiterals, $allowedFunctions);
71+
XPathFilter::filterXPathAxis($xpathWithoutStringLiterals, $allowedAxes);
72+
} catch (RuntimeException $e) {
73+
throw new InvalidArgumentException(sprintf(
74+
$message ?: $e->getMessage(),
75+
$xpathExpression,
76+
));
77+
}
78+
}
79+
}

src/Constants.php

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,55 @@ class Constants
3737
*/
3838
public const XPATH10_URI = 'http://www.w3.org/TR/1999/REC-xpath-19991116';
3939

40-
/**
41-
* The namespace for the XML Path Language 2.0
42-
*/
43-
public const XPATH20_URI = 'http://www.w3.org/TR/2010/REC-xpath20-20101214/';
40+
/** @var array<string> */
41+
public const DEFAULT_ALLOWED_AXES = [
42+
'ancestor',
43+
'ancestor-or-self',
44+
'attribute',
45+
'child',
46+
'descendant',
47+
'descendant-or-self',
48+
'following',
49+
'following-sibling',
50+
// 'namespace', // By default, we do not allow using the namespace axis
51+
'parent',
52+
'preceding',
53+
'preceding-sibling',
54+
'self',
55+
];
4456

45-
/**
46-
* The namespace for the XML Path Language 3.0
47-
*/
48-
public const XPATH30_URI = 'https://www.w3.org/TR/2014/REC-xpath-30-20140408/';
57+
/** @var array<string> */
58+
public const DEFAULT_ALLOWED_FUNCTIONS = [
59+
// 'boolean',
60+
// 'ceiling',
61+
// 'concat',
62+
// 'contains',
63+
// 'count',
64+
// 'false',
65+
// 'floor',
66+
// 'id',
67+
// 'lang',
68+
// 'last',
69+
// 'local-name',
70+
// 'name',
71+
// 'namespace-uri',
72+
// 'normalize-space',
73+
'not',
74+
// 'number',
75+
// 'position',
76+
// 'round',
77+
// 'starts-with',
78+
// 'string',
79+
// 'string-length',
80+
// 'substring',
81+
// 'substring-after',
82+
// 'substring-before',
83+
// 'sum',
84+
// 'text',
85+
// 'translate',
86+
// 'true',
87+
];
4988

50-
/**
51-
* The namespace for the XML Path Language 3.1
52-
*/
53-
public const XPATH31_URI = 'https://www.w3.org/TR/2017/REC-xpath-31-20170321/';
89+
/** @var int */
90+
public const XPATH_FILTER_MAX_LENGTH = 100;
5491
}

src/Utils/XPathFilter.php

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace SimpleSAML\XML\Utils;
6+
7+
use SimpleSAML\XML\Exception\RuntimeException;
8+
9+
use function in_array;
10+
use function preg_match_all;
11+
use function preg_replace;
12+
13+
/**
14+
* XPathFilter helper functions for the XML library.
15+
*
16+
* @package simplesamlphp/xml-common
17+
*/
18+
class XPathFilter
19+
{
20+
/**
21+
* Remove the content from all single or double-quoted strings in $input, leaving only quotes.
22+
*
23+
* @param string $input
24+
* @return string
25+
* @throws \SimpleSAML\XML\Exception\RuntimeException
26+
*/
27+
public static function removeStringContents(string $input): string
28+
{
29+
/**
30+
* This regex should not be vulnerable to a ReDOS, because it uses possessive quantifiers
31+
* that prevent backtracking.
32+
*
33+
* @see https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS
34+
*
35+
* Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking.
36+
*
37+
* '/(["\'])(?:(?!\1).)*+\1/'
38+
* (["\']) # Match a single or double quote and capture it in group 1
39+
* (?: # Start a non-capturing group
40+
* (?! # Negative lookahead
41+
* \1 # Match the same quote as in group 1
42+
* ) # End of negative lookahead
43+
* . # Match any character (that is not a quote, because of the negative lookahead)
44+
* )*+ # Repeat the non-capturing group zero or more times, possessively
45+
* \1 # Match the same quote as in group 1
46+
*/
47+
$res = preg_replace(
48+
'/(["\'])(?:(?!\\1).)*+\\1/',
49+
"\\1\\1", // Replace the content with two of the quotes that were matched
50+
$input,
51+
);
52+
53+
if (null === $res) {
54+
throw new RuntimeException("Error in preg_replace");
55+
}
56+
57+
return $res;
58+
}
59+
60+
61+
/**
62+
* Check if the $xpath_expression uses an XPath function that is not in the list of allowed functions
63+
*
64+
* @param string $xpathExpression the expression to check. Should be a valid xpath expression
65+
* @param string[] $allowedFunctions array of string with a list of allowed function names
66+
* @throws \SimpleSAML\XML\Exception\RuntimeException
67+
*/
68+
public static function filterXPathFunction(string $xpathExpression, array $allowedFunctions): void
69+
{
70+
/**
71+
* Look for the function specifier '(' and look for a function name before it.
72+
* Ignoring whitespace before the '(' and the function name.
73+
* All functions must match a string on a list of allowed function names
74+
*/
75+
$matches = [];
76+
$res = preg_match_all(
77+
/**
78+
* Function names are lower-case alpha (i.e. [a-z]) and can contain one or more hyphens,
79+
* but cannot start or end with a hyphen. To match this, we start with matching one or more
80+
* lower-case alpha characters, followed by zero or more atomic groups that start with a hyphen
81+
* and then match one or more lower-case alpha characters. This ensures that the function name
82+
* cannot start or end with a hyphen, but can contain one or more hyphens.
83+
* More than one consecutive hyphen does not match.
84+
*
85+
* Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking
86+
* and thus prevent a ReDOS.
87+
88+
* '/([a-z]++(?>-[a-z]++)*+)\s*+\(/'
89+
* ( # Start a capturing group
90+
* [a-z]++ # Match one or more lower-case alpha characters
91+
* (?> # Start an atomic group (no capturing)
92+
* - # Match a hyphen
93+
* [a-z]++ # Match one or more lower-case alpha characters, possessively
94+
* )*+ # Repeat the atomic group zero or more times,
95+
* ) # End of the capturing group
96+
* \s*+ # Match zero or more whitespace characters, possessively
97+
* \( # Match an opening parenthesis
98+
*/
99+
100+
'/([a-z]++(?>-[a-z]++)*+)\\s*+\\(/',
101+
$xpathExpression,
102+
$matches,
103+
);
104+
105+
// Check that all the function names we found are in the list of allowed function names
106+
foreach ($matches[1] as $match) {
107+
if (!in_array($match, $allowedFunctions)) {
108+
throw new RuntimeException("Invalid function: '" . $match . "'");
109+
}
110+
}
111+
}
112+
113+
114+
/**
115+
* Check if the $xpath_expression uses an XPath axis that is not in the list of allowed axes
116+
*
117+
* @param string $xpathExpression the expression to check. Should be a valid xpath expression
118+
* @param string[] $allowedAxes array of string with a list of allowed axes names
119+
* @throws \SimpleSAML\XML\Exception\RuntimeException
120+
*/
121+
public static function filterXPathAxis(string $xpathExpression, array $allowedAxes): void
122+
{
123+
/**
124+
* Look for the axis specifier '::' and look for a function name before it.
125+
* Ignoring whitespace before the '::' and the axis name.
126+
* All axes must match a string on a list of allowed axis names
127+
*/
128+
$matches = [];
129+
$res = preg_match_all(
130+
/**
131+
* We use the same rules for matching Axis names as we do for function names.
132+
* The only difference is that we match the '::' instead of the '('
133+
* so everything that was said about the regular expression for function names
134+
* applies here as well.
135+
*
136+
* Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking
137+
* and thus prevent a ReDOS.
138+
*
139+
* '/([a-z]++(?>-[a-z]++)*+)\s*+::'
140+
* ( # Start a capturing group
141+
* [a-z]++ # Match one or more lower-case alpha characters
142+
* (?> # Start an atomic group (no capturing)
143+
* - # Match a hyphen
144+
* [a-z]++ # Match one or more lower-case alpha characters, possessively
145+
* )*+ # Repeat the atomic group zero or more times,
146+
* ) # End of the capturing group
147+
* \s*+ # Match zero or more whitespace characters, possessively
148+
* \( # Match an opening parenthesis
149+
*/
150+
151+
'/([a-z]++(?>-[a-z]++)*+)\\s*+::/',
152+
$xpathExpression,
153+
$matches,
154+
);
155+
156+
// Check that all the axes names we found are in the list of allowed axes names
157+
foreach ($matches[1] as $match) {
158+
if (!in_array($match, $allowedAxes)) {
159+
throw new RuntimeException("Invalid axis: '" . $match . "'");
160+
}
161+
}
162+
}
163+
}

0 commit comments

Comments
 (0)