Skip to content

Commit f78d5d7

Browse files
committed
[XPath] Simplify step optimization and fix a bug with attribute iteration.
16243 72.1% successes 0 0.0% skipped 4012 17.8% failures 2259 10.0% errors
1 parent 5200de2 commit f78d5d7

File tree

6 files changed

+233
-270
lines changed

6 files changed

+233
-270
lines changed

lib/src/xpath/expressions/path.dart

Lines changed: 83 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@ import 'node.dart';
1313
import 'step.dart';
1414

1515
class PathExpression implements XPathExpression {
16-
factory PathExpression(List<Step> steps, {required bool isAbsolute}) {
16+
factory PathExpression(
17+
List<XPathExpression> steps, {
18+
required bool isAbsolute,
19+
}) {
1720
if (steps.isEmpty) {
1821
assert(isAbsolute);
1922
return const PathExpression._(
@@ -22,134 +25,41 @@ class PathExpression implements XPathExpression {
2225
isOrderPreserved: true,
2326
);
2427
}
25-
final optimizedSteps = <Step>[steps.first];
26-
for (final step in steps.skip(1)) {
27-
Step? merged;
28-
final last = optimizedSteps.last;
29-
if (last.axis is DescendantOrSelfAxis &&
30-
last.nodeTest is NodeTypeTest &&
31-
last.predicates.isEmpty &&
32-
step.predicates.isEmpty) {
33-
// Try to merge the '//' step with the next step:
34-
// The next step must not have any predicates,
35-
// because the predicates might depend on the `position` of the context node
36-
// and we cannot guarantee the `position` is still preserved after merging.
37-
switch (step.axis) {
38-
// child::x => descendant::x
28+
final optimizedSteps = <XPathExpression>[steps.first];
29+
for (var i = 1; i < steps.length; i++) {
30+
final previous = optimizedSteps.last;
31+
final current = steps[i];
32+
if (previous is StepExpression &&
33+
previous.predicates.isEmpty &&
34+
previous.axis is DescendantOrSelfAxis &&
35+
previous.nodeTest is NodeTypeTest &&
36+
current is StepExpression &&
37+
current.predicates.isEmpty) {
38+
switch (current.axis) {
3939
case ChildAxis():
40-
merged = Step(
40+
optimizedSteps.last = StepExpression(
4141
const DescendantAxis(),
42-
nodeTest: step.nodeTest,
43-
predicates: step.predicates,
42+
nodeTest: current.nodeTest,
4443
);
45-
// self::x => descendant-or-self::x
4644
case SelfAxis():
47-
merged = Step(
45+
optimizedSteps.last = StepExpression(
4846
const DescendantOrSelfAxis(),
49-
nodeTest: step.nodeTest,
50-
predicates: step.predicates,
47+
nodeTest: current.nodeTest,
5148
);
52-
// descendant::x => descendant::x
5349
case DescendantAxis():
54-
// descendant-or-self::x => descendant-or-self::x
5550
case DescendantOrSelfAxis():
56-
merged = step;
51+
optimizedSteps.last = current;
5752
default:
53+
optimizedSteps.add(current);
5854
}
59-
}
60-
if (merged != null) {
61-
optimizedSteps.removeLast();
62-
optimizedSteps.add(merged);
6355
} else {
64-
optimizedSteps.add(step);
65-
}
66-
}
67-
68-
// The resulting nodes' document order and uniqueness is preserved,
69-
// if the steps match the following patterns:
70-
//
71-
// 1. anyStep (selfStep | attributeStep)*
72-
// 2. (selfStep | childStep)+ (descendantStep | descendantOrSelfStep)? (selfStep | attributeStep)*
73-
//
74-
// Proof:
75-
//
76-
// Suppose we have node-set S_prev which is the result of the previous step and it's in document order.
77-
// And we are going to apply step f to it, where f is at least the second step obviously, then we get node-set S_next = f(S_prev).
78-
//
79-
// For any node pair i, j in S_next, where i comes before j in S_next, we need to prove that i comes before j in document order (1).
80-
//
81-
// Let's denote any nodes in S_prev that generate i and j as i' and j' respectively.
82-
// If i' = j', then i comes before j in document order because step f is a forward step, so (1) is true.
83-
// So we only need to discuss the case where **i' != j'**.
84-
//
85-
// To prove (1), we need to prove that:
86-
// - i' comes not after j' in S_prev (2)
87-
// - all nodes in f(i') come before all nodes in f(j') in document order (3).
88-
//
89-
// If i' comes after j' and we still get i before j in S_next, that means both f(i') and f(j') will produce i (4).
90-
// To prove (2), we just need to prove that (4) is impossible.
91-
//
92-
// Let's discuss each pattern:
93-
//
94-
// **Pattern 1**
95-
// Step f can only be selfStep or attributeStep.
96-
// For such step f, f(i') and f(j') cannot produce the same node i, so (4) is impossible and (2) is true.
97-
// Now that i' comes before j' in S_prev, we have:
98-
// - If f is selfStep, then i = i' and j = j', so (3) is true.
99-
// - If f is attributeStep, then all attributes of i' must come before all attributes of j' in document order, so (3) is true.
100-
//
101-
// **Pattern 2**
102-
// We only need to discuss a simpler form of pattern2:
103-
// - (selfStep | childStep)+ (descendantStep | descendantOrSelfStep)?
104-
// Because if this simpler form will produce nodes in document order, then these steps can be viewed as a single complex step.
105-
// Adding `(selfStep | attributeStep)*` after it is the same as pattern 1, which has been proved.
106-
// So step f can be selfStep, childStep, descendantStep, or descendantOrSelfStep.
107-
// For selfStep, the proof is the same as pattern 1.
108-
// And after all the steps before f, the resulting nodes have the same depth, which means j' cannot be the descendant of i' and vice versa (5).
109-
// For descendantOrSelfStep:
110-
// - Because of (5), f(i') and f(j') cannot produce the same node i, so (4) is impossible and (2) is true.
111-
// - Because of (2) and (5), all nodes in f(i') must come before all nodes in f(j') in document order, so (3) is true.
112-
// Same proof applies to descendantStep and childStep.
113-
//
114-
// So for all the patterns, (2) and (3) are true, so (1) is true.
115-
// By induction, the document order and uniqueness is preserved after applying all the steps.
116-
bool isPattern1() {
117-
for (var i = 1; i < optimizedSteps.length; i++) {
118-
final step = optimizedSteps[i];
119-
if (step.axis is! SelfAxis && step.axis is! AttributeAxis) {
120-
return false;
121-
}
122-
}
123-
return true;
124-
}
125-
126-
bool isPattern2() {
127-
var i = 0;
128-
for (i = 0; i < optimizedSteps.length; i++) {
129-
final axis = optimizedSteps[i].axis;
130-
if (axis is! SelfAxis && axis is! AttributeAxis && axis is! ChildAxis) {
131-
break;
132-
}
56+
optimizedSteps.add(current);
13357
}
134-
if (i < optimizedSteps.length) {
135-
final axis = optimizedSteps[i].axis;
136-
if (axis is DescendantAxis || axis is DescendantOrSelfAxis) {
137-
i++;
138-
}
139-
}
140-
for (; i < optimizedSteps.length; i++) {
141-
final axis = optimizedSteps[i].axis;
142-
if (axis is! SelfAxis && axis is! AttributeAxis) {
143-
return false;
144-
}
145-
}
146-
return true;
14758
}
148-
14959
return PathExpression._(
15060
optimizedSteps,
15161
isAbsolute: isAbsolute,
152-
isOrderPreserved: isPattern1() || isPattern2(),
62+
isOrderPreserved: _isOrderPreserved(optimizedSteps),
15363
);
15464
}
15565

@@ -159,10 +69,8 @@ class PathExpression implements XPathExpression {
15969
required this.isOrderPreserved,
16070
});
16171

162-
final List<Step> steps;
72+
final List<XPathExpression> steps;
16373
final bool isAbsolute;
164-
165-
/// Whether the document order of the resulting nodes is preserved after applying all the steps.
16674
final bool isOrderPreserved;
16775

16876
@override
@@ -200,31 +108,68 @@ class PathExpression implements XPathExpression {
200108
return XPathSequence(_sortAndDeduplicate(nodes, contextNode.root));
201109
}
202110
}
111+
}
203112

204-
static List<Object> _sortAndDeduplicate(Iterable<Object> iter, XmlNode root) {
205-
final nodes = <XmlNode>{};
206-
final others = <Object>{};
207-
for (final item in iter) {
208-
if (item is XmlNode) {
209-
nodes.add(item);
210-
} else {
211-
others.add(item);
212-
}
113+
bool _isOrderPreserved(List<XPathExpression> expressions) {
114+
if (expressions.length <= 1) {
115+
return true;
116+
}
117+
if (expressions.any((expression) => expression is! StepExpression)) {
118+
return false;
119+
}
120+
final steps = expressions.cast<StepExpression>().toList();
121+
if (steps
122+
.skip(1)
123+
.every((s) => s.axis is SelfAxis || s.axis is AttributeAxis)) {
124+
return true;
125+
}
126+
var i = 0;
127+
while (i < steps.length) {
128+
final axis = steps[i].axis;
129+
if (axis is SelfAxis || axis is AttributeAxis || axis is ChildAxis) {
130+
i++;
131+
} else {
132+
break;
213133
}
214-
final result = <Object>[];
215-
if (nodes.length <= 50) {
216-
result.addAll(nodes.sorted((a, b) => a.compareNodePosition(b)));
134+
}
135+
if (i < steps.length) {
136+
final axis = steps[i].axis;
137+
if (axis is DescendantAxis || axis is DescendantOrSelfAxis) i++;
138+
}
139+
while (i < steps.length) {
140+
final axis = steps[i].axis;
141+
if (axis is SelfAxis || axis is AttributeAxis) {
142+
i++;
217143
} else {
218-
if (nodes.remove(root)) result.add(root);
219-
for (final node in root.descendants) {
220-
if (nodes.isEmpty) break;
221-
if (nodes.remove(node)) result.add(node);
222-
}
223-
if (nodes.isNotEmpty) {
224-
result.addAll(nodes.sorted((a, b) => a.compareNodePosition(b)));
225-
}
144+
break;
145+
}
146+
}
147+
return i == steps.length;
148+
}
149+
150+
List<Object> _sortAndDeduplicate(Iterable<Object> iter, XmlNode root) {
151+
final nodes = <XmlNode>{};
152+
final others = <Object>{};
153+
for (final item in iter) {
154+
if (item is XmlNode) {
155+
nodes.add(item);
156+
} else {
157+
others.add(item);
158+
}
159+
}
160+
final result = <Object>[];
161+
if (nodes.length <= 50) {
162+
result.addAll(nodes.sorted((a, b) => a.compareNodePosition(b)));
163+
} else {
164+
if (nodes.remove(root)) result.add(root);
165+
for (final node in root.descendants) {
166+
if (nodes.isEmpty) break;
167+
if (nodes.remove(node)) result.add(node);
168+
}
169+
if (nodes.isNotEmpty) {
170+
result.addAll(nodes.sorted((a, b) => a.compareNodePosition(b)));
226171
}
227-
result.addAll(others);
228-
return result;
229172
}
173+
result.addAll(others);
174+
return result;
230175
}

lib/src/xpath/expressions/step.dart

Lines changed: 9 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,17 @@ import 'package:meta/meta.dart';
33
import '../evaluation/context.dart';
44
import '../evaluation/expression.dart' show XPathExpression;
55
import '../types/node.dart';
6+
import '../types/sequence.dart';
67
import 'axis.dart';
78
import 'node.dart';
89
import 'predicate.dart';
910

11+
/// A step in a path expression.
12+
///
13+
/// Returns the resulting nodes in document order.
1014
@immutable
11-
class Step {
12-
const Step(
15+
class StepExpression implements XPathExpression {
16+
const StepExpression(
1317
this.axis, {
1418
this.nodeTest = const NodeTypeTest(),
1519
this.predicates = const [],
@@ -19,8 +23,8 @@ class Step {
1923
final NodeTest nodeTest;
2024
final List<Predicate> predicates;
2125

22-
/// Apply this step to the given context, returning the resulting nodes in document order.
23-
Iterable<Object> call(XPathContext context) {
26+
@override
27+
XPathSequence call(XPathContext context) {
2428
var result = <Object>[];
2529
for (final node in axis.find(xsNode.cast(context.item))) {
2630
if (nodeTest.matches(node)) {
@@ -43,24 +47,6 @@ class Step {
4347
result = matched;
4448
}
4549
}
46-
return result;
50+
return XPathSequence(result);
4751
}
4852
}
49-
50-
class ExpressionStep implements Step {
51-
const ExpressionStep(this.expression);
52-
53-
final XPathExpression expression;
54-
55-
@override
56-
Axis get axis => const SelfAxis();
57-
58-
@override
59-
NodeTest get nodeTest => const NodeTypeTest();
60-
61-
@override
62-
List<Predicate> get predicates => const [];
63-
64-
@override
65-
Iterable<Object> call(XPathContext context) => expression(context);
66-
}

0 commit comments

Comments
 (0)