Skip to content

Commit e1c418d

Browse files
authored
XPath: Flatten the AST to make it more flexible (servo#39800)
The current XPath AST is tightly tied to the parser grammar. To address issues like servo#39739, we'll want to do optimization passes on the AST in the future, which requires relaxing it's structure first. This PR moves the AST structure into a separate module and flattens it into a shape where most expressions are variants of the core `Expression` enum. This also has the nice side effect of being a net reduction in LoC and making the AST *significantly* easier to read. I think the difference between the old and new structure is best explained by example. One of our unit tests parses `concat('hello', ' ', 'world')`, and the current AST looks like this: ```rust Expr::Path(PathExpr { is_absolute: false, is_descendant: false, steps: vec![StepExpr::Filter(FilterExpr { primary: PrimaryExpr::Function(CoreFunction::Concat(vec![ Expr::Path(PathExpr { is_absolute: false, is_descendant: false, steps: vec![StepExpr::Filter(FilterExpr { primary: PrimaryExpr::Literal(Literal::String( "hello".to_string(), )), predicates: PredicateListExpr { predicates: vec![] }, })], }), Expr::Path(PathExpr { is_absolute: false, is_descendant: false, steps: vec![StepExpr::Filter(FilterExpr { primary: PrimaryExpr::Literal(Literal::String(" ".to_string())), predicates: PredicateListExpr { predicates: vec![] }, })], }), Expr::Path(PathExpr { is_absolute: false, is_descendant: false, steps: vec![StepExpr::Filter(FilterExpr { primary: PrimaryExpr::Literal(Literal::String( "world".to_string(), )), predicates: PredicateListExpr { predicates: vec![] }, })], }), ])), predicates: PredicateListExpr { predicates: vec![] }, })], }), ``` After this change, the AST looks like this: ```rust Expression::Function(CoreFunction::Concat(vec![ Expression::Literal(Literal::String("hello".to_string())), Expression::Literal(Literal::String(" ".to_string())), Expression::Literal(Literal::String("world".to_string())), ])), ``` Testing: No behaviour change intended, covered by existing tests. Part of servo#34527 --------- Signed-off-by: Simon Wülker <[email protected]>
1 parent 6a9ade0 commit e1c418d

File tree

6 files changed

+464
-579
lines changed

6 files changed

+464
-579
lines changed

components/script/dom/xpathexpression.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::rc::Rc;
66

77
use dom_struct::dom_struct;
88
use js::rust::HandleObject;
9-
use xpath::{Error as XPathError, Expr, evaluate_parsed_xpath};
9+
use xpath::{Error as XPathError, Expression, evaluate_parsed_xpath};
1010

1111
use crate::dom::bindings::codegen::Bindings::XPathExpressionBinding::XPathExpressionMethods;
1212
use crate::dom::bindings::codegen::Bindings::XPathNSResolverBinding::XPathNSResolver;
@@ -24,11 +24,11 @@ pub(crate) struct XPathExpression {
2424
reflector_: Reflector,
2525
window: Dom<Window>,
2626
#[no_trace]
27-
parsed_expression: Expr,
27+
parsed_expression: Expression,
2828
}
2929

3030
impl XPathExpression {
31-
fn new_inherited(window: &Window, parsed_expression: Expr) -> XPathExpression {
31+
fn new_inherited(window: &Window, parsed_expression: Expression) -> XPathExpression {
3232
XPathExpression {
3333
reflector_: Reflector::new(),
3434
window: Dom::from_ref(window),
@@ -40,7 +40,7 @@ impl XPathExpression {
4040
window: &Window,
4141
proto: Option<HandleObject>,
4242
can_gc: CanGc,
43-
parsed_expression: Expr,
43+
parsed_expression: Expression,
4444
) -> DomRoot<XPathExpression> {
4545
reflect_dom_object_with_proto(
4646
Box::new(XPathExpression::new_inherited(window, parsed_expression)),

components/xpath/src/ast.rs

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
/* This Source Code Form is subject to the terms of the Mozilla Public
2+
* License, v. 2.0. If a copy of the MPL was not distributed with this
3+
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4+
5+
use malloc_size_of_derive::MallocSizeOf;
6+
7+
#[derive(Clone, Debug, MallocSizeOf, PartialEq)]
8+
pub enum Expression {
9+
Binary(Box<Expression>, BinaryOperator, Box<Expression>),
10+
Negate(Box<Expression>),
11+
Path(PathExpression),
12+
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#section-Location-Steps>
13+
LocationStep(LocationStepExpression),
14+
Filter(FilterExpression),
15+
Literal(Literal),
16+
Variable(QName),
17+
ContextItem,
18+
/// We only support the built-in core functions.
19+
Function(CoreFunction),
20+
}
21+
22+
#[derive(Clone, Debug, MallocSizeOf, PartialEq)]
23+
pub enum BinaryOperator {
24+
Or,
25+
And,
26+
Union,
27+
/// `=`
28+
Equal,
29+
/// `!=`
30+
NotEqual,
31+
/// `<`
32+
LessThan,
33+
/// `>`
34+
GreaterThan,
35+
/// `<=`
36+
LessThanOrEqual,
37+
/// `>=`
38+
GreaterThanOrEqual,
39+
/// `+`
40+
Add,
41+
/// `-`
42+
Subtract,
43+
/// `*`
44+
Multiply,
45+
/// `div`
46+
Divide,
47+
/// `mod`
48+
Modulo,
49+
}
50+
51+
#[derive(Clone, Debug, MallocSizeOf, PartialEq)]
52+
pub struct PathExpression {
53+
/// Whether this is an absolute (as opposed to a relative) path expression.
54+
///
55+
/// Absolute paths always start at the starting node, not the context node.
56+
pub(crate) is_absolute: bool,
57+
/// Whether this expression starts with `//`. If it does, then an implicit
58+
/// `descendant-or-self::node()` step will be added.
59+
pub(crate) has_implicit_descendant_or_self_step: bool,
60+
pub(crate) steps: Vec<Expression>,
61+
}
62+
63+
#[derive(Clone, Debug, MallocSizeOf, PartialEq)]
64+
pub struct PredicateListExpression {
65+
pub(crate) predicates: Vec<Expression>,
66+
}
67+
68+
#[derive(Clone, Debug, MallocSizeOf, PartialEq)]
69+
pub struct FilterExpression {
70+
pub(crate) expression: Box<Expression>,
71+
pub(crate) predicates: PredicateListExpression,
72+
}
73+
74+
/// <https://www.w3.org/TR/1999/REC-xpath-19991116/#section-Location-Steps>
75+
#[derive(Clone, Debug, MallocSizeOf, PartialEq)]
76+
pub struct LocationStepExpression {
77+
pub(crate) axis: Axis,
78+
pub(crate) node_test: NodeTest,
79+
pub(crate) predicate_list: PredicateListExpression,
80+
}
81+
82+
#[derive(Clone, Debug, MallocSizeOf, PartialEq)]
83+
pub(crate) enum Axis {
84+
Child,
85+
Descendant,
86+
Attribute,
87+
Self_,
88+
DescendantOrSelf,
89+
FollowingSibling,
90+
Following,
91+
Namespace,
92+
Parent,
93+
Ancestor,
94+
PrecedingSibling,
95+
Preceding,
96+
AncestorOrSelf,
97+
}
98+
99+
#[derive(Clone, Debug, MallocSizeOf, PartialEq)]
100+
pub(crate) enum NodeTest {
101+
Name(QName),
102+
Wildcard,
103+
Kind(KindTest),
104+
}
105+
106+
#[derive(Clone, Debug, MallocSizeOf, PartialEq)]
107+
pub struct QName {
108+
pub(crate) prefix: Option<String>,
109+
pub(crate) local_part: String,
110+
}
111+
112+
impl std::fmt::Display for QName {
113+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
114+
match &self.prefix {
115+
Some(prefix) => write!(f, "{}:{}", prefix, self.local_part),
116+
None => write!(f, "{}", self.local_part),
117+
}
118+
}
119+
}
120+
121+
#[derive(Clone, Debug, MallocSizeOf, PartialEq)]
122+
pub(crate) enum KindTest {
123+
PI(Option<String>),
124+
Comment,
125+
Text,
126+
Node,
127+
}
128+
129+
#[derive(Clone, Debug, MallocSizeOf, PartialEq)]
130+
pub enum Literal {
131+
Integer(i64),
132+
Decimal(f64),
133+
String(String),
134+
}
135+
136+
/// In the DOM we do not support custom functions, so we can enumerate the usable ones
137+
#[derive(Clone, Debug, MallocSizeOf, PartialEq)]
138+
pub enum CoreFunction {
139+
// Node Set Functions
140+
/// last()
141+
Last,
142+
/// position()
143+
Position,
144+
/// count(node-set)
145+
Count(Box<Expression>),
146+
/// id(object)
147+
Id(Box<Expression>),
148+
/// local-name(node-set?)
149+
LocalName(Option<Box<Expression>>),
150+
/// namespace-uri(node-set?)
151+
NamespaceUri(Option<Box<Expression>>),
152+
/// name(node-set?)
153+
Name(Option<Box<Expression>>),
154+
155+
// String Functions
156+
/// string(object?)
157+
String(Option<Box<Expression>>),
158+
/// concat(string, string, ...)
159+
Concat(Vec<Expression>),
160+
/// starts-with(string, string)
161+
StartsWith(Box<Expression>, Box<Expression>),
162+
/// contains(string, string)
163+
Contains(Box<Expression>, Box<Expression>),
164+
/// substring-before(string, string)
165+
SubstringBefore(Box<Expression>, Box<Expression>),
166+
/// substring-after(string, string)
167+
SubstringAfter(Box<Expression>, Box<Expression>),
168+
/// substring(string, number, number?)
169+
Substring(Box<Expression>, Box<Expression>, Option<Box<Expression>>),
170+
/// string-length(string?)
171+
StringLength(Option<Box<Expression>>),
172+
/// normalize-space(string?)
173+
NormalizeSpace(Option<Box<Expression>>),
174+
/// translate(string, string, string)
175+
Translate(Box<Expression>, Box<Expression>, Box<Expression>),
176+
177+
// Number Functions
178+
/// number(object?)
179+
Number(Option<Box<Expression>>),
180+
/// sum(node-set)
181+
Sum(Box<Expression>),
182+
/// floor(number)
183+
Floor(Box<Expression>),
184+
/// ceiling(number)
185+
Ceiling(Box<Expression>),
186+
/// round(number)
187+
Round(Box<Expression>),
188+
189+
// Boolean Functions
190+
/// boolean(object)
191+
Boolean(Box<Expression>),
192+
/// not(boolean)
193+
Not(Box<Expression>),
194+
/// true()
195+
True,
196+
/// false()
197+
False,
198+
/// lang(string)
199+
Lang(Box<Expression>),
200+
}

0 commit comments

Comments
 (0)