Skip to content

Commit ae8f884

Browse files
committed
Add assertion to test XPath filters against an allow-list for axes and functions
1 parent 3ace522 commit ae8f884

File tree

3 files changed

+401
-12
lines changed

3 files changed

+401
-12
lines changed

src/Assert/Assert.php

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace SimpleSAML\XML\Assert;
6+
7+
use BadMethodCallException; // Requires ext-spl
8+
use DateTime; // requires ext-date
9+
use DateTimeImmutable; // requires ext-date
10+
use InvalidArgumentException; // Requires ext-spl
11+
use SimpleSAML\Assert\Assert as BaseAssert;
12+
use SimpleSAML\Assert\AssertionFailedException;
13+
use Throwable;
14+
15+
use function array_pop;
16+
use function array_unshift;
17+
use function call_user_func_array;
18+
use function end;
19+
use function enum_exists;
20+
use function function_exists;
21+
use function get_class;
22+
use function is_object;
23+
use function is_resource;
24+
use function is_string;
25+
use function is_subclass_of;
26+
use function lcfirst;
27+
use function method_exists;
28+
use function preg_match; // Requires ext-pcre
29+
use function strval;
30+
31+
/**
32+
* SimpleSAML\XML\Assert\Assert wrapper class
33+
*
34+
* @package simplesamlphp/xml-common
35+
*
36+
* @method static void allowedXPathFilter(mixed $value, array $allowed_axes, array $allowed_functions, string $message = '', string $exception = '')
37+
* @method static void nullOrAllowedXPathFilter(mixed $value, array $allowed_axes, array $allowed_functions, string $message = '', string $exception = '')
38+
* @method static void allAllowedXPathFilter(mixed $value, array $allowed_axes, array $allowed_functions, string $message = '', string $exception = '')
39+
*/
40+
final class Assert
41+
{
42+
use CustomAssertionTrait;
43+
44+
45+
/**
46+
* @param string $name
47+
* @param array<mixed> $arguments
48+
*/
49+
public static function __callStatic(string $name, array $arguments): void
50+
{
51+
// Handle Exception-parameter
52+
$exception = AssertionFailedException::class;
53+
54+
$last = end($arguments);
55+
if (is_string($last) && class_exists($last) && is_subclass_of($last, Throwable::class)) {
56+
$exception = $last;
57+
array_pop($arguments);
58+
}
59+
60+
try {
61+
if (method_exists(static::class, $name)) {
62+
call_user_func_array([static::class, $name], $arguments);
63+
return;
64+
} elseif (preg_match('/^nullOr(.*)$/i', $name, $matches)) {
65+
$method = lcfirst($matches[1]);
66+
if (method_exists(static::class, $method)) {
67+
call_user_func_array([static::class, 'nullOr'], [[static::class, $method], $arguments]);
68+
} elseif (method_exists(BaseAssert::class, $method)) {
69+
call_user_func_array([static::class, 'nullOr'], [[BaseAssert::class, $method], $arguments]);
70+
} else {
71+
throw new BadMethodCallException(sprintf("Assertion named `%s` does not exists.", $method));
72+
}
73+
} elseif (preg_match('/^all(.*)$/i', $name, $matches)) {
74+
$method = lcfirst($matches[1]);
75+
if (method_exists(static::class, $method)) {
76+
call_user_func_array([static::class, 'all'], [[static::class, $method], $arguments]);
77+
} elseif (method_exists(BaseAssert::class, $method)) {
78+
call_user_func_array([static::class, 'all'], [[BaseAssert::class, $method], $arguments]);
79+
} else {
80+
throw new BadMethodCallException(sprintf("Assertion named `%s` does not exists.", $method));
81+
}
82+
} else {
83+
throw new BadMethodCallException(sprintf("Assertion named `%s` does not exists.", $name));
84+
}
85+
} catch (InvalidArgumentException $e) {
86+
throw new $exception($e->getMessage());
87+
}
88+
}
89+
90+
91+
/**
92+
* Handle nullOr* for either Webmozart or for our custom assertions
93+
*
94+
* @param callable $method
95+
* @param array<mixed> $arguments
96+
* @return void
97+
*/
98+
private static function nullOr(callable $method, array $arguments): void
99+
{
100+
$value = reset($arguments);
101+
($value === null) || call_user_func_array($method, $arguments);
102+
}
103+
104+
105+
/**
106+
* all* for our custom assertions
107+
*
108+
* @param callable $method
109+
* @param array<mixed> $arguments
110+
* @return void
111+
*/
112+
private static function all(callable $method, array $arguments): void
113+
{
114+
$values = array_pop($arguments);
115+
foreach ($values as $value) {
116+
$tmp = $arguments;
117+
array_unshift($tmp, $value);
118+
call_user_func_array($method, $tmp);
119+
}
120+
}
121+
122+
123+
/**
124+
* @param mixed $value
125+
*
126+
* @return string
127+
*/
128+
protected static function valueToString(mixed $value): string
129+
{
130+
if (is_resource($value)) {
131+
return 'resource';
132+
}
133+
134+
if (null === $value) {
135+
return 'null';
136+
}
137+
138+
if (true === $value) {
139+
return 'true';
140+
}
141+
142+
if (false === $value) {
143+
return 'false';
144+
}
145+
146+
if (is_array($value)) {
147+
return 'array';
148+
}
149+
150+
if (is_object($value)) {
151+
if (method_exists($value, '__toString')) {
152+
return $value::class . ': ' . self::valueToString($value->__toString());
153+
}
154+
155+
if ($value instanceof DateTime || $value instanceof DateTimeImmutable) {
156+
return $value::class . ': ' . self::valueToString($value->format('c'));
157+
}
158+
159+
if (function_exists('enum_exists') && enum_exists(get_class($value))) {
160+
return get_class($value) . '::' . $value->name;
161+
}
162+
163+
return $value::class;
164+
}
165+
166+
if (is_string($value)) {
167+
return '"' . $value . '"';
168+
}
169+
170+
return strval($value);
171+
}
172+
}
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace SimpleSAML\XML\Assert;
6+
7+
use InvalidArgumentException;
8+
use SimpleSAML\Assert\Assert as BaseAssert;
9+
use SimpleSAML\XML\Constants as C;
10+
11+
use function in_array;
12+
use function preg_match_all;
13+
use function preg_replace;
14+
use function sprintf;
15+
16+
/**
17+
* @package simplesamlphp/xml-common
18+
*/
19+
trait CustomAssertionTrait
20+
{
21+
/**
22+
* Remove the content from all single or double-quoted strings in $input, leaving only quotes.
23+
* Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking.
24+
*
25+
* '/(["\'])(?:(?!\1).)*+\1/'
26+
* (["\']) # Match a single or double quote and capture it in group 1
27+
* (?: # Start a non-capturing group
28+
* (?! # Negative lookahead
29+
* \1 # Match the same quote as in group 1
30+
* ) # End of negative lookahead
31+
* . # Match any character (that is not a quote, because of the negative lookahead)
32+
* )*+ # Repeat the non-capturing group zero or more times, possessively
33+
* \1 # Match the same quote as in group 1
34+
*/
35+
private static string $regex_xpfilter_remove_strings = '/(["\'])(?:(?!\1).)*+\1/';
36+
37+
/**
38+
* Function names are lower-case alpha (i.e. [a-z]) and can contain one or more hyphens,
39+
* but cannot start or end with a hyphen. To match this, we start with matching one or more
40+
* lower-case alpha characters, followed by zero or more atomic groups that start with a hyphen
41+
* and then match one or more lower-case alpha characters. This ensures that the function name
42+
* cannot start or end with a hyphen, but can contain one or more hyphens.
43+
* More than one consecutive hyphen does not match.
44+
*
45+
* '/([a-z]++(?>-[a-z]++)*+)\s*+\(/'
46+
* ( # Start a capturing group
47+
* [a-z]++ # Match one or more lower-case alpha characters
48+
* (?> # Start an atomic group (no capturing)
49+
* - # Match a hyphen
50+
* [a-z]++ # Match one or more lower-case alpha characters, possessively
51+
* )*+ # Repeat the atomic group zero or more times,
52+
* ) # End of the capturing group
53+
* \s*+ # Match zero or more whitespace characters, possessively
54+
* \( # Match an opening parenthesis
55+
*/
56+
private static string $regex_xpfilter_functions = '/([a-z]++(?>-[a-z]++)*+)\\s*+\\(/';
57+
58+
/**
59+
* We use the same rules for matching Axis names as we do for function names.
60+
* The only difference is that we match the '::' instead of the '('
61+
* so everything that was said about the regular expression for function names
62+
* applies here as well.
63+
*
64+
* '/([a-z]++(?>-[a-z]++)*+)\s*+::'
65+
* ( # Start a capturing group
66+
* [a-z]++ # Match one or more lower-case alpha characters
67+
* (?> # Start an atomic group (no capturing)
68+
* - # Match a hyphen
69+
* [a-z]++ # Match one or more lower-case alpha characters, possessively
70+
* )*+ # Repeat the atomic group zero or more times,
71+
* ) # End of the capturing group
72+
* \s*+ # Match zero or more whitespace characters, possessively
73+
* \( # Match an opening parenthesis
74+
*/
75+
private static string $regex_xpfilter_axes = '/([a-z]++(?>-[a-z]++)*+)\\s*+::/';
76+
77+
78+
/***********************************************************************************
79+
* NOTE: Custom assertions may be added below this line. *
80+
* They SHOULD be marked as `private` to ensure the call is forced *
81+
* through __callStatic(). *
82+
* Assertions marked `public` are called directly and will *
83+
* not handle any custom exception passed to it. *
84+
***********************************************************************************/
85+
86+
/**
87+
* Check an XPath expression for allowed axes and functions
88+
* The goal is preventing DoS attacks by limiting the complexity of the XPath expression by only allowing
89+
* a select subset of functions and axes.
90+
* The check uses a list of allowed functions and axes, and throws an exception when an unknown function
91+
* or axis is found in the $xpath_expression.
92+
*
93+
* Limitations:
94+
* - The implementation is based on regular expressions, and does not employ an XPath 1.0 parser. It may not
95+
* evaluate all possible valid XPath expressions correctly and cause either false positives for valid
96+
* expressions or false negatives for invalid expressions.
97+
* - The check may still allow expressions that are not safe, I.e. expressions that consist of only
98+
* functions and axes that are deemed "save", but that are still slow to evaluate. The time it takes to
99+
* evaluate an XPath expression depends on the complexity of both the XPath expression and the XML document.
100+
* This check, however, does not take the XML document into account, nor is it aware of the internals of the
101+
* XPath processor that will evaluate the expression.
102+
* - The check was written with the XPath 1.0 syntax in mind, but should work equally well for XPath 2.0 and 3.0.
103+
*
104+
* @param string $value
105+
* @param array<string> $allowed_axes
106+
* @param array<string> $allowed_functions
107+
* @param string $message
108+
*/
109+
private static function allowedXPathFilter(
110+
string $value,
111+
array $allowed_axes = C::DEFAULT_ALLOWED_AXES,
112+
array $allowed_functions = C::DEFAULT_ALLOWED_FUNCTIONS,
113+
string $message = '',
114+
): void {
115+
BaseAssert::allString($allowed_axes);
116+
BaseAssert::allString($allowed_functions);
117+
BaseAssert::maxLength(
118+
$value,
119+
C::XPATH_FILTER_MAX_LENGTH,
120+
sprintf('XPath Filter exceeds the limit of 100 characters.'),
121+
);
122+
123+
$strippedValue = preg_replace(
124+
self::$regex_xpfilter_remove_strings,
125+
// Replace the content with two of the quotes that were matched
126+
"\\1\\1",
127+
$value,
128+
);
129+
130+
if ($strippedValue === null) {
131+
throw new Exception("Error in preg_replace.");
132+
}
133+
134+
/**
135+
* Check if the $xpath_expression uses an XPath function that is not in the list of allowed functions
136+
*
137+
* Look for the function specifier '(' and look for a function name before it.
138+
* Ignoring whitespace before the '(' and the function name.
139+
* All functions must match a string on a list of allowed function names
140+
*/
141+
$matches = [];
142+
$res = preg_match_all(self::$regex_xpfilter_functions, $strippedValue, $matches);
143+
if ($res === false) {
144+
throw new Exception("Error in preg_match_all.");
145+
}
146+
147+
// Check that all the function names we found are in the list of allowed function names
148+
foreach ($matches[1] as $match) {
149+
if (!in_array($match, $allowed_functions)) {
150+
throw new InvalidArgumentException(sprintf(
151+
$message ?: '\'%s\' is not an allowed XPath function.',
152+
$match,
153+
));
154+
}
155+
}
156+
157+
/**
158+
* Check if the $xpath_expression uses an XPath axis that is not in the list of allowed axes
159+
*
160+
* Look for the axis specifier '::' and look for a function name before it.
161+
* Ignoring whitespace before the '::' and the axis name.
162+
* All axes must match a string on a list of allowed axis names
163+
*/
164+
$matches = [];
165+
$res = preg_match_all(self::$regex_xpfilter_axes, $strippedValue, $matches);
166+
if ($res === false) {
167+
throw new Exception("Error in preg_match_all.");
168+
}
169+
170+
// Check that all the axes names we found are in the list of allowed axes names
171+
foreach ($matches[1] as $match) {
172+
if (!in_array($match, $allowed_axes)) {
173+
throw new InvalidArgumentException(sprintf(
174+
$message ?: '\'%s\' is not an allowed XPath axis.',
175+
$match,
176+
));
177+
}
178+
}
179+
}
180+
}

0 commit comments

Comments
 (0)