Skip to content

Commit de646ae

Browse files
committed
Adopt the Python flavor of regular expressions from TRegex
1 parent 1b29146 commit de646ae

File tree

4 files changed

+231
-276
lines changed

4 files changed

+231
-276
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/SREModuleBuiltins.java

Lines changed: 38 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,10 @@
4242

4343
import static com.oracle.graal.python.runtime.exception.PythonErrorType.RuntimeError;
4444
import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError;
45+
import static com.oracle.graal.python.runtime.exception.PythonErrorType.ValueError;
4546

4647
import java.io.UnsupportedEncodingException;
4748
import java.util.List;
48-
import java.util.regex.Matcher;
4949
import java.util.regex.Pattern;
5050

5151
import com.oracle.graal.python.builtins.Builtin;
@@ -78,6 +78,7 @@
7878
import com.oracle.truffle.api.interop.UnsupportedTypeException;
7979
import com.oracle.truffle.api.nodes.Node;
8080
import com.oracle.truffle.api.profiles.BranchProfile;
81+
import com.oracle.truffle.regex.RegexSyntaxException;
8182

8283
@CoreFunctions(defineModule = "_sre")
8384
public class SREModuleBuiltins extends PythonBuiltins {
@@ -86,106 +87,6 @@ protected List<? extends NodeFactory<? extends PythonBuiltinBaseNode>> getNodeFa
8687
return SREModuleBuiltinsFactory.getFactories();
8788
}
8889

89-
@Builtin(name = "tregex_preprocess_for_verbose", fixedNumOfPositionalArgs = 1)
90-
@GenerateNodeFactory
91-
abstract static class TRegexPreprocessVerboseNode extends PythonUnaryBuiltinNode {
92-
93-
@Specialization
94-
Object run(PString str) {
95-
return run(str.getValue());
96-
}
97-
98-
@Specialization
99-
Object run(String str) {
100-
return replaceAll(str);
101-
}
102-
103-
/**
104-
* removes comments and whitespaces if they are not in a character class
105-
*/
106-
@TruffleBoundary(transferToInterpreterOnException = false, allowInlining = true)
107-
private static String replaceAll(String r) {
108-
StringBuffer sb = new StringBuffer(r);
109-
int charclassNestingLevel = 0;
110-
boolean inComment = false;
111-
for (int i = 0; i < sb.length();) {
112-
char c = sb.charAt(i);
113-
if (c == '[' && !inComment) {
114-
charclassNestingLevel++;
115-
} else if (c == ']' && !inComment) {
116-
charclassNestingLevel--;
117-
} else if (c == '#' && charclassNestingLevel == 0) {
118-
inComment = true;
119-
} else if (c == '\n' && inComment) {
120-
inComment = false;
121-
}
122-
if (inComment || (Character.isWhitespace(c) && charclassNestingLevel == 0)) {
123-
sb.deleteCharAt(i);
124-
} else {
125-
i++;
126-
}
127-
}
128-
129-
for (int idx = sb.indexOf("\\Z"); idx != -1; idx = sb.indexOf("\\Z", idx + 2)) {
130-
sb.replace(idx, idx + 2, "$");
131-
}
132-
133-
return sb.toString();
134-
}
135-
136-
@Fallback
137-
Object run(Object o) {
138-
throw raise(PythonErrorType.TypeError, "expected string, not %p", o);
139-
}
140-
141-
}
142-
143-
@Builtin(name = "tregex_preprocess_default", fixedNumOfPositionalArgs = 1)
144-
@GenerateNodeFactory
145-
abstract static class TRegexPreprocessDefaultNode extends PythonUnaryBuiltinNode {
146-
@CompilationFinal private Pattern namedCaptGroupPattern;
147-
148-
@Specialization
149-
Object run(PString str) {
150-
return run(str.getValue());
151-
}
152-
153-
@Specialization
154-
Object run(String str) {
155-
if (namedCaptGroupPattern == null) {
156-
CompilerDirectives.transferToInterpreterAndInvalidate();
157-
namedCaptGroupPattern = Pattern.compile("\\?P\\<(?<GRPNAME>\\w*)\\>");
158-
}
159-
return replaceAll(str);
160-
}
161-
162-
/**
163-
* replaces named capturing groups {@code ?P<name>} by {@code ?<name>} and replaces
164-
* end-of-string {@code \Z} by {@code $}.
165-
*/
166-
@TruffleBoundary(transferToInterpreterOnException = false, allowInlining = true)
167-
private String replaceAll(String r) {
168-
Matcher matcher0 = namedCaptGroupPattern.matcher(r);
169-
StringBuffer sb = new StringBuffer();
170-
while (matcher0.find()) {
171-
matcher0.appendReplacement(sb, "?<" + matcher0.group("GRPNAME") + ">");
172-
}
173-
matcher0.appendTail(sb);
174-
175-
for (int idx = sb.indexOf("\\Z"); idx != -1; idx = sb.indexOf("\\Z", idx + 2)) {
176-
sb.replace(idx, idx + 2, "$");
177-
}
178-
179-
return sb.toString();
180-
}
181-
182-
@Fallback
183-
Object run(Object o) {
184-
throw raise(PythonErrorType.TypeError, "expected string, not %p", o);
185-
}
186-
187-
}
188-
18990
/**
19091
* Replaces any <it>quoted</it> escape sequence like {@code "\\n"} (two characters; backslash +
19192
* 'n') by its single character like {@code "\n"} (one character; newline).
@@ -257,45 +158,63 @@ private SequenceStorageNodes.ToByteArrayNode getToByteArrayNode() {
257158

258159
}
259160

260-
@Builtin(name = "tregex_call_safe", fixedNumOfPositionalArgs = 3)
161+
@Builtin(name = "tregex_call_compile", fixedNumOfPositionalArgs = 3)
261162
@TypeSystemReference(PythonArithmeticTypes.class)
262163
@GenerateNodeFactory
263-
abstract static class TRegexCallSafe extends PythonBuiltinNode {
164+
abstract static class TRegexCallCompile extends PythonBuiltinNode {
264165

265-
private Object doIt(TruffleObject callable, String arg1, Object arg2,
266-
BranchProfile runtimeError,
267-
BranchProfile typeError, Node invokeNode) {
166+
@Specialization(guards = "isForeignObject(callable)")
167+
Object call(TruffleObject callable, Object arg1, Object arg2,
168+
@Cached("create()") BranchProfile syntaxError,
169+
@Cached("create()") BranchProfile typeError,
170+
@Cached("createExecute()") Node invokeNode) {
268171
try {
269172
return ForeignAccess.sendExecute(invokeNode, callable, new Object[]{arg1, arg2});
270173
} catch (ArityException | UnsupportedTypeException | UnsupportedMessageException e) {
271174
typeError.enter();
272175
throw raise(TypeError, "%s", e);
273-
} catch (RuntimeException e) {
274-
runtimeError.enter();
275-
throw raise(RuntimeError, "%s", e);
176+
} catch (RegexSyntaxException e) {
177+
syntaxError.enter();
178+
if (e.getPosition() == -1) {
179+
throw raise(ValueError, "%s", e.getReason());
180+
} else {
181+
throw raise(ValueError, "%s at position %d", e.getReason(), e.getPosition());
182+
}
276183
}
277184
}
278185

279-
@Specialization(guards = "isForeignObject(callable)")
280-
Object call(TruffleObject callable, String arg1, String arg2,
281-
@Cached("create()") BranchProfile runtimeError,
282-
@Cached("create()") BranchProfile typeError,
283-
@Cached("createExecute()") Node invokeNode) {
284-
return doIt(callable, arg1, arg2, runtimeError, typeError, invokeNode);
186+
@SuppressWarnings("unused")
187+
@Fallback
188+
Object call(Object callable, Object arg1, Object arg2) {
189+
throw raise(RuntimeError, "invalid arguments passed to tregex_call_compile");
190+
}
191+
192+
protected static Node createExecute() {
193+
return Message.EXECUTE.createNode();
285194
}
195+
}
196+
197+
@Builtin(name = "tregex_call_exec", fixedNumOfPositionalArgs = 3)
198+
@TypeSystemReference(PythonArithmeticTypes.class)
199+
@GenerateNodeFactory
200+
abstract static class TRegexCallExec extends PythonBuiltinNode {
286201

287202
@Specialization(guards = "isForeignObject(callable)")
288-
Object call(TruffleObject callable, String arg1, int arg2,
289-
@Cached("create()") BranchProfile runtimeError,
203+
Object call(TruffleObject callable, Object arg1, Number arg2,
290204
@Cached("create()") BranchProfile typeError,
291205
@Cached("createExecute()") Node invokeNode) {
292-
return doIt(callable, arg1, arg2, runtimeError, typeError, invokeNode);
206+
try {
207+
return ForeignAccess.sendExecute(invokeNode, callable, new Object[]{arg1, arg2});
208+
} catch (ArityException | UnsupportedTypeException | UnsupportedMessageException e) {
209+
typeError.enter();
210+
throw raise(TypeError, "%s", e);
211+
}
293212
}
294213

295214
@SuppressWarnings("unused")
296215
@Fallback
297216
Object call(Object callable, Object arg1, Object arg2) {
298-
throw raise(RuntimeError);
217+
throw raise(RuntimeError, "invalid arguments passed to tregex_call_exec");
299218
}
300219

301220
protected static Node createExecute() {

0 commit comments

Comments
 (0)