44
55namespace SimpleSAML \XML \Assert ;
66
7+ use InvalidArgumentException ;
78use SimpleSAML \Assert \Assert as BaseAssert ;
89use SimpleSAML \Assert \AssertionFailedException ;
910use SimpleSAML \XML \Constants as C ;
1011use SimpleSAML \XML \Exception \RuntimeException ;
12+ use SimpleSAML \XML \Utils \XPathFilter ;
1113
12- use function in_array ;
13- use function preg_match_all ;
14- use function preg_replace ;
1514use function sprintf ;
1615
1716/**
1817 * @package simplesamlphp/xml-common
1918 */
2019trait XPathFilterTrait
2120{
22- /**
23- * Remove the content from all single or double-quoted strings in $input, leaving only quotes.
24- * Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking.
25- *
26- * '/(["\'])(?:(?!\1).)*+\1/'
27- * (["\']) # Match a single or double quote and capture it in group 1
28- * (?: # Start a non-capturing group
29- * (?! # Negative lookahead
30- * \1 # Match the same quote as in group 1
31- * ) # End of negative lookahead
32- * . # Match any character (that is not a quote, because of the negative lookahead)
33- * )*+ # Repeat the non-capturing group zero or more times, possessively
34- * \1 # Match the same quote as in group 1
35- */
36- private static string $ regex_xpfilter_remove_strings = '/([" \'])(?:(?!\1).)*+\1/ ' ;
37-
38- /**
39- * Function names are lower-case alpha (i.e. [a-z]) and can contain one or more hyphens,
40- * but cannot start or end with a hyphen. To match this, we start with matching one or more
41- * lower-case alpha characters, followed by zero or more atomic groups that start with a hyphen
42- * and then match one or more lower-case alpha characters. This ensures that the function name
43- * cannot start or end with a hyphen, but can contain one or more hyphens.
44- * More than one consecutive hyphen does not match.
45- *
46- * '/([a-z]++(?>-[a-z]++)*+)\s*+\(/'
47- * ( # Start a capturing group
48- * [a-z]++ # Match one or more lower-case alpha characters
49- * (?> # Start an atomic group (no capturing)
50- * - # Match a hyphen
51- * [a-z]++ # Match one or more lower-case alpha characters, possessively
52- * )*+ # Repeat the atomic group zero or more times,
53- * ) # End of the capturing group
54- * \s*+ # Match zero or more whitespace characters, possessively
55- * \( # Match an opening parenthesis
56- */
57- private static string $ regex_xpfilter_functions = '/([a-z]++(?>-[a-z]++)*+) \\s*+ \\(/ ' ;
58-
59- /**
60- * We use the same rules for matching Axis names as we do for function names.
61- * The only difference is that we match the '::' instead of the '('
62- * so everything that was said about the regular expression for function names
63- * applies here as well.
64- *
65- * '/([a-z]++(?>-[a-z]++)*+)\s*+::'
66- * ( # Start a capturing group
67- * [a-z]++ # Match one or more lower-case alpha characters
68- * (?> # Start an atomic group (no capturing)
69- * - # Match a hyphen
70- * [a-z]++ # Match one or more lower-case alpha characters, possessively
71- * )*+ # Repeat the atomic group zero or more times,
72- * ) # End of the capturing group
73- * \s*+ # Match zero or more whitespace characters, possessively
74- * \( # Match an opening parenthesis
75- */
76- private static string $ regex_xpfilter_axes = '/([a-z]++(?>-[a-z]++)*+) \\s*+::/ ' ;
77-
78-
7921 /***********************************************************************************
8022 * NOTE: Custom assertions may be added below this line. *
8123 * They SHOULD be marked as `private` to ensure the call is forced *
@@ -89,7 +31,7 @@ trait XPathFilterTrait
8931 * The goal is preventing DoS attacks by limiting the complexity of the XPath expression by only allowing
9032 * a select subset of functions and axes.
9133 * The check uses a list of allowed functions and axes, and throws an exception when an unknown function
92- * or axis is found in the $value .
34+ * or axis is found in the $xpathExpression .
9335 *
9436 * Limitations:
9537 * - The implementation is based on regular expressions, and does not employ an XPath 1.0 parser. It may not
@@ -102,107 +44,37 @@ trait XPathFilterTrait
10244 * XPath processor that will evaluate the expression.
10345 * - The check was written with the XPath 1.0 syntax in mind, but should work equally well for XPath 2.0 and 3.0.
10446 *
105- * @param string $value
47+ * @param string $xpathExpression
10648 * @param array<string> $allowed_axes
10749 * @param array<string> $allowed_functions
10850 * @param string $message
10951 */
11052 public static function validAllowedXPathFilter (
111- string $ value ,
112- array $ allowed_axes = C::DEFAULT_ALLOWED_AXES ,
113- array $ allowed_functions = C::DEFAULT_ALLOWED_FUNCTIONS ,
53+ string $ xpathExpression ,
54+ array $ allowedAxes = C::DEFAULT_ALLOWED_AXES ,
55+ array $ allowedFunctions = C::DEFAULT_ALLOWED_FUNCTIONS ,
11456 string $ message = '' ,
11557 ): void {
116- BaseAssert::allString ($ allowed_axes );
117- BaseAssert::allString ($ allowed_functions );
58+ BaseAssert::allString ($ allowedAxes );
59+ BaseAssert::allString ($ allowedFunctions );
11860 BaseAssert::maxLength (
119- $ value ,
61+ $ xpathExpression ,
12062 C::XPATH_FILTER_MAX_LENGTH ,
12163 sprintf ('XPath Filter exceeds the limit of 100 characters. ' ),
12264 );
12365
124- $ strippedValue = preg_replace (
125- self ::$ regex_xpfilter_remove_strings ,
126- // Replace the content with two of the quotes that were matched
127- "\\1 \\1 " ,
128- $ value ,
129- );
130-
131- if ($ strippedValue === null ) {
132- throw new RuntimeException ("Error in preg_replace. " );
133- }
134-
135- self ::validAllowedXpathFunctions ($ strippedValue , $ allowed_functions );
136- self ::validAllowedXpathAxes ($ strippedValue , $ allowed_axes );
137- }
138-
139-
140- /**
141- * @param string $value
142- * @param array<string> $allowed_functions
143- * @param string $message
144- */
145- public static function validAllowedXPathFunctions (
146- string $ value ,
147- array $ allowed_functions = C::DEFAULT_ALLOWED_FUNCTIONS ,
148- string $ message = '' ,
149- ): void {
150- /**
151- * Check if the $xpath_expression uses an XPath function that is not in the list of allowed functions
152- *
153- * Look for the function specifier '(' and look for a function name before it.
154- * Ignoring whitespace before the '(' and the function name.
155- * All functions must match a string on a list of allowed function names
156- */
157- $ matches = [];
158- $ res = preg_match_all (self ::$ regex_xpfilter_functions , $ value , $ matches );
159- if ($ res === false ) {
160- throw new RuntimeException ("Error in preg_match_all. " );
161- }
162-
163- // Check that all the function names we found are in the list of allowed function names
164- foreach ($ matches [1 ] as $ match ) {
165- if (!in_array ($ match , $ allowed_functions )) {
166- throw new AssertionFailedException (sprintf (
167- $ message ?: '\'%s \' is not an allowed XPath function. ' ,
168- $ match ,
169- ));
170- }
171- }
172- }
173-
174-
175- /**
176- * @param string $value
177- * @param array<string> $allowed_axes
178- * @param string $message
179- */
180- public static function validAllowedXPathAxes (
181- string $ value ,
182- array $ allowed_axes = C::DEFAULT_ALLOWED_AXES ,
183- string $ message = '' ,
184- ): void {
185- /**
186- * Check if the $value uses an XPath axis that is not in the list of allowed axes
187- *
188- * Look for the axis specifier '::' and look for a function name before it.
189- * Ignoring whitespace before the '::' and the axis name.
190- * All axes must match a string on a list of allowed axis names
191- */
192- $ matches = [];
193- $ res = preg_match_all (self ::$ regex_xpfilter_axes , $ value , $ matches );
194- if ($ res === false ) {
195- throw new RuntimeException ("Error in preg_match_all. " );
196- }
197-
198- // Check that all the axes names we found are in the list of allowed axes names
199- foreach ($ matches [1 ] as $ match ) {
200- if (!in_array ($ match , $ allowed_axes )) {
201- throw new AssertionFailedException (sprintf (
202- $ message ?: '\'%s \' is not an allowed XPath axis. ' ,
203- $ match ,
204- ));
205- }
66+ try {
67+ // First remove the contents of any string literals in the $xpath to prevent false positives
68+ $ xpathWithoutStringLiterals = XPathFilter::removeStringContents ($ xpathExpression );
69+
70+ // Then check that the xpath expression only contains allowed functions and axes, throws when it doesn't
71+ XPathFilter::filterXPathFunction ($ xpathWithoutStringLiterals , $ allowedFunctions );
72+ XPathFilter::filterXPathAxis ($ xpathWithoutStringLiterals , $ allowedAxes );
73+ } catch (RuntimeException $ e ) {
74+ throw new InvalidArgumentException (sprintf (
75+ $ message ?: $ e ->getMessage (),
76+ $ xpathExpression ,
77+ ));
20678 }
20779 }
20880}
0 commit comments