44
55namespace SimpleSAML \XML \Assert ;
66
7+ use InvalidArgumentException ;
78use SimpleSAML \Assert \Assert as BaseAssert ;
8- use SimpleSAML \Assert \AssertionFailedException ;
99use SimpleSAML \XML \Constants as C ;
1010use SimpleSAML \XML \Exception \RuntimeException ;
11+ use SimpleSAML \XML \Utils \XPathFilter ;
1112
12- use function in_array ;
13- use function preg_match_all ;
14- use function preg_replace ;
1513use function sprintf ;
1614
1715/**
1816 * @package simplesamlphp/xml-common
1917 */
2018trait XPathFilterTrait
2119{
22- /**
23- * Remove the content from all single or double-quoted strings in $input, leaving only quotes.
24- * Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking.
25- *
26- * '/(["\'])(?:(?!\1).)*+\1/'
27- * (["\']) # Match a single or double quote and capture it in group 1
28- * (?: # Start a non-capturing group
29- * (?! # Negative lookahead
30- * \1 # Match the same quote as in group 1
31- * ) # End of negative lookahead
32- * . # Match any character (that is not a quote, because of the negative lookahead)
33- * )*+ # Repeat the non-capturing group zero or more times, possessively
34- * \1 # Match the same quote as in group 1
35- */
36- private static string $ regex_xpfilter_remove_strings = '/([" \'])(?:(?!\1).)*+\1/ ' ;
37-
38- /**
39- * Function names are lower-case alpha (i.e. [a-z]) and can contain one or more hyphens,
40- * but cannot start or end with a hyphen. To match this, we start with matching one or more
41- * lower-case alpha characters, followed by zero or more atomic groups that start with a hyphen
42- * and then match one or more lower-case alpha characters. This ensures that the function name
43- * cannot start or end with a hyphen, but can contain one or more hyphens.
44- * More than one consecutive hyphen does not match.
45- *
46- * '/([a-z]++(?>-[a-z]++)*+)\s*+\(/'
47- * ( # Start a capturing group
48- * [a-z]++ # Match one or more lower-case alpha characters
49- * (?> # Start an atomic group (no capturing)
50- * - # Match a hyphen
51- * [a-z]++ # Match one or more lower-case alpha characters, possessively
52- * )*+ # Repeat the atomic group zero or more times,
53- * ) # End of the capturing group
54- * \s*+ # Match zero or more whitespace characters, possessively
55- * \( # Match an opening parenthesis
56- */
57- private static string $ regex_xpfilter_functions = '/([a-z]++(?>-[a-z]++)*+) \\s*+ \\(/ ' ;
58-
59- /**
60- * We use the same rules for matching Axis names as we do for function names.
61- * The only difference is that we match the '::' instead of the '('
62- * so everything that was said about the regular expression for function names
63- * applies here as well.
64- *
65- * '/([a-z]++(?>-[a-z]++)*+)\s*+::'
66- * ( # Start a capturing group
67- * [a-z]++ # Match one or more lower-case alpha characters
68- * (?> # Start an atomic group (no capturing)
69- * - # Match a hyphen
70- * [a-z]++ # Match one or more lower-case alpha characters, possessively
71- * )*+ # Repeat the atomic group zero or more times,
72- * ) # End of the capturing group
73- * \s*+ # Match zero or more whitespace characters, possessively
74- * \( # Match an opening parenthesis
75- */
76- private static string $ regex_xpfilter_axes = '/([a-z]++(?>-[a-z]++)*+) \\s*+::/ ' ;
77-
78-
7920 /***********************************************************************************
8021 * NOTE: Custom assertions may be added below this line. *
8122 * They SHOULD be marked as `private` to ensure the call is forced *
@@ -89,7 +30,7 @@ trait XPathFilterTrait
8930 * The goal is preventing DoS attacks by limiting the complexity of the XPath expression by only allowing
9031 * a select subset of functions and axes.
9132 * The check uses a list of allowed functions and axes, and throws an exception when an unknown function
92- * or axis is found in the $value .
33+ * or axis is found in the $xpathExpression .
9334 *
9435 * Limitations:
9536 * - The implementation is based on regular expressions, and does not employ an XPath 1.0 parser. It may not
@@ -102,107 +43,37 @@ trait XPathFilterTrait
10243 * XPath processor that will evaluate the expression.
10344 * - The check was written with the XPath 1.0 syntax in mind, but should work equally well for XPath 2.0 and 3.0.
10445 *
105- * @param string $value
106- * @param array<string> $allowed_axes
107- * @param array<string> $allowed_functions
46+ * @param string $xpathExpression
47+ * @param array<string> $allowedAxes
48+ * @param array<string> $allowedFunctions
10849 * @param string $message
10950 */
11051 public static function validAllowedXPathFilter (
111- string $ value ,
112- array $ allowed_axes = C::DEFAULT_ALLOWED_AXES ,
113- array $ allowed_functions = C::DEFAULT_ALLOWED_FUNCTIONS ,
52+ string $ xpathExpression ,
53+ array $ allowedAxes = C::DEFAULT_ALLOWED_AXES ,
54+ array $ allowedFunctions = C::DEFAULT_ALLOWED_FUNCTIONS ,
11455 string $ message = '' ,
11556 ): void {
116- BaseAssert::allString ($ allowed_axes );
117- BaseAssert::allString ($ allowed_functions );
57+ BaseAssert::allString ($ allowedAxes );
58+ BaseAssert::allString ($ allowedFunctions );
11859 BaseAssert::maxLength (
119- $ value ,
60+ $ xpathExpression ,
12061 C::XPATH_FILTER_MAX_LENGTH ,
12162 sprintf ('XPath Filter exceeds the limit of 100 characters. ' ),
12263 );
12364
124- $ strippedValue = preg_replace (
125- self ::$ regex_xpfilter_remove_strings ,
126- // Replace the content with two of the quotes that were matched
127- "\\1 \\1 " ,
128- $ value ,
129- );
130-
131- if ($ strippedValue === null ) {
132- throw new RuntimeException ("Error in preg_replace. " );
133- }
134-
135- self ::validAllowedXpathFunctions ($ strippedValue , $ allowed_functions );
136- self ::validAllowedXpathAxes ($ strippedValue , $ allowed_axes );
137- }
138-
139-
140- /**
141- * @param string $value
142- * @param array<string> $allowed_functions
143- * @param string $message
144- */
145- public static function validAllowedXPathFunctions (
146- string $ value ,
147- array $ allowed_functions = C::DEFAULT_ALLOWED_FUNCTIONS ,
148- string $ message = '' ,
149- ): void {
150- /**
151- * Check if the $xpath_expression uses an XPath function that is not in the list of allowed functions
152- *
153- * Look for the function specifier '(' and look for a function name before it.
154- * Ignoring whitespace before the '(' and the function name.
155- * All functions must match a string on a list of allowed function names
156- */
157- $ matches = [];
158- $ res = preg_match_all (self ::$ regex_xpfilter_functions , $ value , $ matches );
159- if ($ res === false ) {
160- throw new RuntimeException ("Error in preg_match_all. " );
161- }
162-
163- // Check that all the function names we found are in the list of allowed function names
164- foreach ($ matches [1 ] as $ match ) {
165- if (!in_array ($ match , $ allowed_functions )) {
166- throw new AssertionFailedException (sprintf (
167- $ message ?: '\'%s \' is not an allowed XPath function. ' ,
168- $ match ,
169- ));
170- }
171- }
172- }
173-
174-
175- /**
176- * @param string $value
177- * @param array<string> $allowed_axes
178- * @param string $message
179- */
180- public static function validAllowedXPathAxes (
181- string $ value ,
182- array $ allowed_axes = C::DEFAULT_ALLOWED_AXES ,
183- string $ message = '' ,
184- ): void {
185- /**
186- * Check if the $value uses an XPath axis that is not in the list of allowed axes
187- *
188- * Look for the axis specifier '::' and look for a function name before it.
189- * Ignoring whitespace before the '::' and the axis name.
190- * All axes must match a string on a list of allowed axis names
191- */
192- $ matches = [];
193- $ res = preg_match_all (self ::$ regex_xpfilter_axes , $ value , $ matches );
194- if ($ res === false ) {
195- throw new RuntimeException ("Error in preg_match_all. " );
196- }
197-
198- // Check that all the axes names we found are in the list of allowed axes names
199- foreach ($ matches [1 ] as $ match ) {
200- if (!in_array ($ match , $ allowed_axes )) {
201- throw new AssertionFailedException (sprintf (
202- $ message ?: '\'%s \' is not an allowed XPath axis. ' ,
203- $ match ,
204- ));
205- }
65+ try {
66+ // First remove the contents of any string literals in the $xpath to prevent false positives
67+ $ xpathWithoutStringLiterals = XPathFilter::removeStringContents ($ xpathExpression );
68+
69+ // Then check that the xpath expression only contains allowed functions and axes, throws when it doesn't
70+ XPathFilter::filterXPathFunction ($ xpathWithoutStringLiterals , $ allowedFunctions );
71+ XPathFilter::filterXPathAxis ($ xpathWithoutStringLiterals , $ allowedAxes );
72+ } catch (RuntimeException $ e ) {
73+ throw new InvalidArgumentException (sprintf (
74+ $ message ?: $ e ->getMessage (),
75+ $ xpathExpression ,
76+ ));
20677 }
20778 }
20879}
0 commit comments