Skip to content

Commit 8fbfee8

Browse files
zaclegarssuredjoooooe
authored andcommitted
TRegex: Fix look around merging with empty transition.
1 parent d7af185 commit 8fbfee8

File tree

5 files changed

+54
-8
lines changed

5 files changed

+54
-8
lines changed

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/JsTests.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,14 @@ public void gr56676() {
311311
test("(?<!a)", "digyus", "x\uDE40", 2, true, 2, 2);
312312
}
313313

314+
@Test
315+
public void emptyTransitionMergedWithLookAhead() {
316+
test("a(?=b(?<=ab)()|)", "", "ab", 0, true, 0, 1, 2, 2);
317+
test("a(?=b(?<=ab)()|)", "", "ac", 0, true, 0, 1, -1, -1);
318+
test("a(?=b(?<=ab)()|)", "", "a", 0, true, 0, 1, -1, -1);
319+
test("a?(?=b(?<=ab)()|)", "", "a", 0, true, 0, 1, -1, -1);
320+
}
321+
314322
@Test
315323
public void generatedTests() {
316324
/* GENERATED CODE BEGIN - KEEP THIS MARKER FOR AUTOMATIC UPDATES */

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nfa/NFAGenerator.java

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
import com.oracle.truffle.regex.tregex.buffer.CompilationBuffer;
6060
import com.oracle.truffle.regex.tregex.parser.Counter;
6161
import com.oracle.truffle.regex.tregex.parser.ast.CharacterClass;
62+
import com.oracle.truffle.regex.tregex.parser.ast.GroupBoundaries;
6263
import com.oracle.truffle.regex.tregex.parser.ast.LookBehindAssertion;
6364
import com.oracle.truffle.regex.tregex.parser.ast.MatchFound;
6465
import com.oracle.truffle.regex.tregex.parser.ast.PositionAssertion;
@@ -76,6 +77,7 @@ public final class NFAGenerator {
7677
private final NFAState dummyInitialState;
7778
private final NFAState[] anchoredInitialStates;
7879
private final NFAState[] initialStates;
80+
private NFAState checkFinalTransitionState;
7981
/**
8082
* These are like {@link #initialStates}, but with {@code mustAdvance} set to {@code false},
8183
* i.e. we have already advanced when we are in these states. In a regular expression with
@@ -153,6 +155,14 @@ private NFAGenerator(RegexAST ast, CompilationBuffer compilationBuffer) {
153155
dummyInitialState.setPredecessors(dummyInitPrev);
154156
}
155157

158+
private NFAState getFinalCheckedTransitionState() {
159+
if (checkFinalTransitionState == null) {
160+
checkFinalTransitionState = createFinalState(StateSet.create(ast, ast.getRoot().getSubTreeParent().getMatchFoundChecked()), ast.getOptions().isMustAdvance());
161+
checkFinalTransitionState.setSuccessors(new NFAStateTransition[]{createNoCGTransition(checkFinalTransitionState, finalState, ast.getEncoding().getFullSet())}, true);
162+
}
163+
return checkFinalTransitionState;
164+
}
165+
156166
public static NFA createNFA(RegexAST ast, CompilationBuffer compilationBuffer) {
157167
return new NFAGenerator(ast, compilationBuffer).doCreateNFA();
158168
}
@@ -301,12 +311,23 @@ private NFAStateTransition[] createNFATransitions(NFAState sourceState, ASTStep
301311
}
302312
} else if (stateSetCC == null) {
303313
if (containsMatchFound) {
304-
transitionsBuffer.add(createTransition(sourceState, finalState, ast.getEncoding().getFullSet()));
305-
// Transitions dominated by a transition to a final state will never end
306-
// up being used, so we can skip generating them and return the current
307-
// list of transitions.
308-
clearGroupBoundaries();
309-
return transitionsBuffer.toArray(new NFAStateTransition[transitionsBuffer.size()]);
314+
if (mergeBuilder.getCodePointSet().matchesEverything(ast.getEncoding())) {
315+
transitionsBuffer.add(createTransition(sourceState, finalState, ast.getEncoding().getFullSet()));
316+
// Transitions dominated by a transition to a final state will never
317+
// end up being used, so we can skip generating them and return the
318+
// current list of transitions.
319+
clearGroupBoundaries();
320+
return transitionsBuffer.toArray(new NFAStateTransition[transitionsBuffer.size()]);
321+
}
322+
// This case is only reachable when merging a lookbehind with an empty
323+
// transition to the final state.
324+
// The issue is that the priority between transitions after running the
325+
// canonicalizer is lost, but that doesn't matter since
326+
// they have disjoint code point sets. But when the transition reaches
327+
// the final state, then it's cps is not checked.
328+
// In that case we use this special checkFinalTransitionState, which
329+
// will still check that last code point set matches.
330+
transitionsBuffer.add(createTransition(sourceState, getFinalCheckedTransitionState(), mergeBuilder.getCodePointSet()));
310331
}
311332
} else {
312333
if (containsMatchFound && allCCInLookBehind) {
@@ -343,6 +364,10 @@ private NFAStateTransition createTransition(NFAState source, NFAState target, Co
343364
return new NFAStateTransition((short) transitionID.inc(), source, target, codePointSet, ast.createGroupBoundaries(transitionGBUpdateIndices, transitionGBClearIndices, -1, lastGroup));
344365
}
345366

367+
private NFAStateTransition createNoCGTransition(NFAState source, NFAState target, CodePointSet codePointSet) {
368+
return new NFAStateTransition((short) transitionID.inc(), source, target, codePointSet, GroupBoundaries.getEmptyInstance(ast.getLanguage()));
369+
}
370+
346371
private NFAState registerMatcherState(StateSet<RegexAST, CharacterClass> stateSetCC,
347372
StateSet<RegexAST, LookBehindAssertion> finishedLookBehinds,
348373
boolean containsPrefixStates,

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/ast/RegexAST.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,7 @@ public AtomicGroup createAtomicGroup() {
392392
}
393393

394394
public void createNFAHelperNodes(RegexASTSubtreeRootNode rootNode) {
395-
nodeCount.inc(4);
395+
nodeCount.inc(5);
396396
PositionAssertion anchored = new PositionAssertion(PositionAssertion.Type.CARET);
397397
rootNode.setAnchoredInitialState(anchored);
398398
MatchFound unAnchored = new MatchFound();
@@ -401,6 +401,8 @@ public void createNFAHelperNodes(RegexASTSubtreeRootNode rootNode) {
401401
rootNode.setMatchFound(end);
402402
PositionAssertion anchoredEnd = new PositionAssertion(PositionAssertion.Type.DOLLAR);
403403
rootNode.setAnchoredFinalState(anchoredEnd);
404+
MatchFound endChecked = new MatchFound();
405+
rootNode.setMatchFoundChecked(endChecked);
404406
}
405407

406408
public PositionAssertion createPositionAssertion(PositionAssertion.Type type) {

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/ast/RegexASTSubtreeRootNode.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ public abstract class RegexASTSubtreeRootNode extends Term implements RegexASTVi
6161
private MatchFound unAnchoredInitialState;
6262
private PositionAssertion anchoredFinalState;
6363
private MatchFound matchFound;
64+
private MatchFound matchFoundChecked;
6465
private boolean visitorGroupVisited = false;
6566

6667
private final SubTreeIndex subtrees = new SubTreeIndex();
@@ -230,4 +231,13 @@ public String toString() {
230231
protected JsonObject toJson(String typeName) {
231232
return super.toJson(typeName).append(Json.prop("group", astNodeId(group)));
232233
}
234+
235+
public void setMatchFoundChecked(MatchFound matchFoundChecked) {
236+
this.matchFoundChecked = matchFoundChecked;
237+
}
238+
239+
public MatchFound getMatchFoundChecked() {
240+
assert matchFoundChecked != null;
241+
return matchFoundChecked;
242+
}
233243
}

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/ast/visitors/InitIDVisitor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0
@@ -121,6 +121,7 @@ protected void leave(Group group) {
121121
if (group.getParent() instanceof RegexASTSubtreeRootNode) {
122122
initID(group.getSubTreeParent().getAnchoredFinalState());
123123
initID(group.getSubTreeParent().getMatchFound());
124+
initID(group.getSubTreeParent().getMatchFoundChecked());
124125
}
125126
}
126127

0 commit comments

Comments
 (0)