Skip to content

Commit db309f8

Browse files
committed
Add assertion to test XPath filters against an allow-list for axes and functions
1 parent 3ace522 commit db309f8

File tree

3 files changed

+400
-12
lines changed

3 files changed

+400
-12
lines changed

src/Assert/Assert.php

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace SimpleSAML\XML\Assert;
6+
7+
use BadMethodCallException; // Requires ext-spl
8+
use DateTime; // requires ext-date
9+
use DateTimeImmutable; // requires ext-date
10+
use InvalidArgumentException; // Requires ext-spl
11+
use SimpleSAML\Assert\Assert as BaseAssert;
12+
use SimpleSAML\Assert\AssertionFailedException;
13+
use Throwable;
14+
15+
use function array_pop;
16+
use function array_unshift;
17+
use function call_user_func_array;
18+
use function end;
19+
use function enum_exists;
20+
use function function_exists;
21+
use function get_class;
22+
use function is_object;
23+
use function is_resource;
24+
use function is_string;
25+
use function is_subclass_of;
26+
use function lcfirst;
27+
use function method_exists;
28+
use function preg_match; // Requires ext-pcre
29+
use function strval;
30+
31+
/**
32+
* SimpleSAML\XML\Assert\Assert wrapper class
33+
*
34+
* @package simplesamlphp/xml-common
35+
*
36+
* @method static void allowedXPathFilter(mixed $value, array $allowed_axes, $array $allowed_functions, string $message = '', string $exception = '')
37+
* @method static void nullOrAllowedXPathFilter(mixed $value, array $allowed_axes, $array $allowed_functions, string $message = '', string $exception = '')
38+
* @method static void allAllowedXPathFilter(mixed $value, array $allowed_axes, $array $allowed_functions, string $message = '', string $exception = '')
39+
*/
40+
final class Assert
41+
{
42+
use CustomAssertionTrait;
43+
44+
45+
/**
46+
* @param string $name
47+
* @param array<mixed> $arguments
48+
*/
49+
public static function __callStatic(string $name, array $arguments): void
50+
{
51+
// Handle Exception-parameter
52+
$exception = AssertionFailedException::class;
53+
54+
$last = end($arguments);
55+
if (is_string($last) && class_exists($last) && is_subclass_of($last, Throwable::class)) {
56+
$exception = $last;
57+
array_pop($arguments);
58+
}
59+
60+
try {
61+
if (method_exists(static::class, $name)) {
62+
call_user_func_array([static::class, $name], $arguments);
63+
return;
64+
} elseif (preg_match('/^nullOr(.*)$/i', $name, $matches)) {
65+
$method = lcfirst($matches[1]);
66+
if (method_exists(static::class, $method)) {
67+
call_user_func_array([static::class, 'nullOr'], [[static::class, $method], $arguments]);
68+
} elseif (method_exists(BaseAssert::class, $method)) {
69+
call_user_func_array([static::class, 'nullOr'], [[BaseAssert::class, $method], $arguments]);
70+
} else {
71+
throw new BadMethodCallException(sprintf("Assertion named `%s` does not exists.", $method));
72+
}
73+
} elseif (preg_match('/^all(.*)$/i', $name, $matches)) {
74+
$method = lcfirst($matches[1]);
75+
if (method_exists(static::class, $method)) {
76+
call_user_func_array([static::class, 'all'], [[static::class, $method], $arguments]);
77+
} elseif (method_exists(BaseAssert::class, $method)) {
78+
call_user_func_array([static::class, 'all'], [[BaseAssert::class, $method], $arguments]);
79+
} else {
80+
throw new BadMethodCallException(sprintf("Assertion named `%s` does not exists.", $method));
81+
}
82+
} else {
83+
throw new BadMethodCallException(sprintf("Assertion named `%s` does not exists.", $name));
84+
}
85+
} catch (InvalidArgumentException $e) {
86+
throw new $exception($e->getMessage());
87+
}
88+
}
89+
90+
91+
/**
92+
* Handle nullOr* for either Webmozart or for our custom assertions
93+
*
94+
* @param callable $method
95+
* @param array<mixed> $arguments
96+
* @return void
97+
*/
98+
private static function nullOr(callable $method, array $arguments): void
99+
{
100+
$value = reset($arguments);
101+
($value === null) || call_user_func_array($method, $arguments);
102+
}
103+
104+
105+
/**
106+
* all* for our custom assertions
107+
*
108+
* @param callable $method
109+
* @param array<mixed> $arguments
110+
* @return void
111+
*/
112+
private static function all(callable $method, array $arguments): void
113+
{
114+
$values = array_pop($arguments);
115+
foreach ($values as $value) {
116+
$tmp = $arguments;
117+
array_unshift($tmp, $value);
118+
call_user_func_array($method, $tmp);
119+
}
120+
}
121+
122+
123+
/**
124+
* @param mixed $value
125+
*
126+
* @return string
127+
*/
128+
protected static function valueToString(mixed $value): string
129+
{
130+
if (is_resource($value)) {
131+
return 'resource';
132+
}
133+
134+
if (null === $value) {
135+
return 'null';
136+
}
137+
138+
if (true === $value) {
139+
return 'true';
140+
}
141+
142+
if (false === $value) {
143+
return 'false';
144+
}
145+
146+
if (is_array($value)) {
147+
return 'array';
148+
}
149+
150+
if (is_object($value)) {
151+
if (method_exists($value, '__toString')) {
152+
return $value::class . ': ' . self::valueToString($value->__toString());
153+
}
154+
155+
if ($value instanceof DateTime || $value instanceof DateTimeImmutable) {
156+
return $value::class . ': ' . self::valueToString($value->format('c'));
157+
}
158+
159+
if (function_exists('enum_exists') && enum_exists(get_class($value))) {
160+
return get_class($value) . '::' . $value->name;
161+
}
162+
163+
return $value::class;
164+
}
165+
166+
if (is_string($value)) {
167+
return '"' . $value . '"';
168+
}
169+
170+
return strval($value);
171+
}
172+
}
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace SimpleSAML\XML\Assert;
6+
7+
use InvalidArgumentException;
8+
use SimpleSAML\Assert\Assert as BaseAssert;
9+
use SimpleSAML\XML\Constants as C;
10+
11+
use function in_array;
12+
use function preg_match_all;
13+
use function preg_replace;
14+
use function sprintf;
15+
use function strlen;
16+
17+
/**
18+
* @package simplesamlphp/xml-common
19+
*/
20+
trait CustomAssertionTrait
21+
{
22+
/**
23+
* Remove the content from all single or double-quoted strings in $input, leaving only quotes.
24+
* Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking.
25+
*
26+
* '/(["\'])(?:(?!\1).)*+\1/'
27+
* (["\']) # Match a single or double quote and capture it in group 1
28+
* (?: # Start a non-capturing group
29+
* (?! # Negative lookahead
30+
* \1 # Match the same quote as in group 1
31+
* ) # End of negative lookahead
32+
* . # Match any character (that is not a quote, because of the negative lookahead)
33+
* )*+ # Repeat the non-capturing group zero or more times, possessively
34+
* \1 # Match the same quote as in group 1
35+
*/
36+
private static string $regex_xpfilter_remove_strings = '/(["\'])(?:(?!\1).)*+\1/';
37+
38+
/**
39+
* Function names are lower-case alpha (i.e. [a-z]) and can contain one or more hyphens,
40+
* but cannot start or end with a hyphen. To match this, we start with matching one or more
41+
* lower-case alpha characters, followed by zero or more atomic groups that start with a hyphen
42+
* and then match one or more lower-case alpha characters. This ensures that the function name
43+
* cannot start or end with a hyphen, but can contain one or more hyphens.
44+
* More than one consecutive hyphen does not match.
45+
*
46+
* '/([a-z]++(?>-[a-z]++)*+)\s*+\(/'
47+
* ( # Start a capturing group
48+
* [a-z]++ # Match one or more lower-case alpha characters
49+
* (?> # Start an atomic group (no capturing)
50+
* - # Match a hyphen
51+
* [a-z]++ # Match one or more lower-case alpha characters, possessively
52+
* )*+ # Repeat the atomic group zero or more times,
53+
* ) # End of the capturing group
54+
* \s*+ # Match zero or more whitespace characters, possessively
55+
* \( # Match an opening parenthesis
56+
*/
57+
private static string $regex_xpfilter_functions = '/([a-z]++(?>-[a-z]++)*+)\\s*+\\(/';
58+
59+
/**
60+
* We use the same rules for matching Axis names as we do for function names.
61+
* The only difference is that we match the '::' instead of the '('
62+
* so everything that was said about the regular expression for function names
63+
* applies here as well.
64+
*
65+
* '/([a-z]++(?>-[a-z]++)*+)\s*+::'
66+
* ( # Start a capturing group
67+
* [a-z]++ # Match one or more lower-case alpha characters
68+
* (?> # Start an atomic group (no capturing)
69+
* - # Match a hyphen
70+
* [a-z]++ # Match one or more lower-case alpha characters, possessively
71+
* )*+ # Repeat the atomic group zero or more times,
72+
* ) # End of the capturing group
73+
* \s*+ # Match zero or more whitespace characters, possessively
74+
* \( # Match an opening parenthesis
75+
*/
76+
private static string $regex_xpfilter_axes = '/([a-z]++(?>-[a-z]++)*+)\\s*+::/';
77+
78+
79+
/***********************************************************************************
80+
* NOTE: Custom assertions may be added below this line. *
81+
* They SHOULD be marked as `private` to ensure the call is forced *
82+
* through __callStatic(). *
83+
* Assertions marked `public` are called directly and will *
84+
* not handle any custom exception passed to it. *
85+
***********************************************************************************/
86+
87+
/**
88+
* Check an XPath expression for allowed axes and functions
89+
* The goal is preventing DoS attacks by limiting the complexity of the XPath expression by only allowing
90+
* a select subset of functions and axes.
91+
* The check uses a list of allowed functions and axes, and throws an exception when an unknown function
92+
* or axis is found in the $xpath_expression.
93+
*
94+
* Limitations:
95+
* - The implementation is based on regular expressions, and does not employ an XPath 1.0 parser. It may not
96+
* evaluate all possible valid XPath expressions correctly and cause either false positives for valid
97+
* expressions or false negatives for invalid expressions.
98+
* - The check may still allow expressions that are not safe, I.e. expressions that consist of only
99+
* functions and axes that are deemed "save", but that are still slow to evaluate. The time it takes to
100+
* evaluate an XPath expression depends on the complexity of both the XPath expression and the XML document.
101+
* This check, however, does not take the XML document into account, nor is it aware of the internals of the
102+
* XPath processor that will evaluate the expression.
103+
* - The check was written with the XPath 1.0 syntax in mind, but should work equally well for XPath 2.0 and 3.0.
104+
*
105+
* @param string $value
106+
* @param array $allowed_axes
107+
* @param array $allowed_functions
108+
* @param string $message
109+
*/
110+
private static function allowedXPathFilter(
111+
string $value,
112+
array $allowed_axes = C::DEFAULT_ALLOWED_AXES,
113+
array $allowed_functions = C::DEFAULT_ALLOWED_FUNCTIONS,
114+
string $message = '',
115+
): void {
116+
BaseAssert::maxLength(
117+
$value,
118+
C::XPATH_FILTER_MAX_LENGTH,
119+
sprintf('XPath Filter exceeds the limit of 100 characters.'),
120+
);
121+
122+
$strippedValue = preg_replace(
123+
self::$regex_xpfilter_remove_strings,
124+
// Replace the content with two of the quotes that were matched
125+
"\\1\\1",
126+
$value,
127+
);
128+
129+
if ($strippedValue === null) {
130+
throw new Exception("Error in preg_replace.");
131+
}
132+
133+
/**
134+
* Check if the $xpath_expression uses an XPath function that is not in the list of allowed functions
135+
*
136+
* Look for the function specifier '(' and look for a function name before it.
137+
* Ignoring whitespace before the '(' and the function name.
138+
* All functions must match a string on a list of allowed function names
139+
*/
140+
$matches = [];
141+
$res = preg_match_all(self::$regex_xpfilter_functions, $strippedValue, $matches);
142+
if ($res === null) {
143+
throw new Exception("Error in preg_match_all.");
144+
}
145+
146+
// Check that all the function names we found are in the list of allowed function names
147+
foreach ($matches[1] as $match) {
148+
if (!in_array($match, $allowed_functions)) {
149+
throw new InvalidArgumentException(sprintf(
150+
$message ?: '\'%s\' is not an allowed XPath function.',
151+
$match,
152+
));
153+
}
154+
}
155+
156+
/**
157+
* Check if the $xpath_expression uses an XPath axis that is not in the list of allowed axes
158+
*
159+
* Look for the axis specifier '::' and look for a function name before it.
160+
* Ignoring whitespace before the '::' and the axis name.
161+
* All axes must match a string on a list of allowed axis names
162+
*/
163+
$matches = [];
164+
$res = preg_match_all(self::$regex_xpfilter_axes, $strippedValue, $matches);
165+
if ($res === null) {
166+
throw new Exception("Error in preg_match_all.");
167+
}
168+
169+
// Check that all the axes names we found are in the list of allowed axes names
170+
foreach ($matches[1] as $match) {
171+
if (!in_array($match, $allowed_axes)) {
172+
throw new InvalidArgumentException(sprintf(
173+
$message ?: '\'%s\' is not an allowed XPath axis.',
174+
$match,
175+
));
176+
}
177+
}
178+
}
179+
}

0 commit comments

Comments
 (0)