Skip to content

Commit 28e48a5

Browse files
committed
[GR-39984] Performance issues with regex and nltk modules.
PullRequest: graalpython/2354
2 parents 49bfd6f + c671d09 commit 28e48a5

File tree

4 files changed

+23
-3
lines changed

4 files changed

+23
-3
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BinasciiModuleBuiltins.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ int getBufferLength(
127127
}
128128

129129
@ExportMessage
130-
@TruffleBoundary
131130
byte readByte(int byteOffset,
132131
@Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode) {
133132
int ch = codePointAtIndexNode.execute(str, byteOffset, TS_ENCODING);

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/EconomicMapStorage.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,12 @@ static final class DictKey {
9797
public int hashCode() {
9898
return (int) hash;
9999
}
100+
101+
@Override
102+
public String toString() {
103+
CompilerAsserts.neverPartOfCompilation();
104+
return value + "@" + hash;
105+
}
100106
}
101107

102108
private final PEMap map;

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/call/special/CallBinaryMethodNode.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0
@@ -201,6 +201,17 @@ static Object callSelfMethod(VirtualFrame frame, @SuppressWarnings("unused") PBu
201201
return builtinNode.execute(frame, func.getSelf(), arg1, arg2);
202202
}
203203

204+
@Specialization(guards = {"builtinNode != null", "getCallTarget(func, getCt) == ct", "!takesSelfArg",
205+
"frame != null || unusedFrame"}, limit = "getCallSiteInlineCacheMaxDepth()")
206+
static Object callMethod(VirtualFrame frame, @SuppressWarnings("unused") PBuiltinMethod func, Object arg1, Object arg2,
207+
@SuppressWarnings("unused") @Cached GetCallTargetNode getCt,
208+
@SuppressWarnings("unused") @Cached("getCallTarget(func, getCt)") RootCallTarget ct,
209+
@SuppressWarnings("unused") @Cached("takesSelfArg(func)") boolean takesSelfArg,
210+
@Cached("getTernary(frame, func.getFunction())") PythonTernaryBuiltinNode builtinNode,
211+
@SuppressWarnings("unused") @Cached("frameIsUnused(builtinNode)") boolean unusedFrame) {
212+
return builtinNode.execute(frame, arg1, arg2, PNone.NO_VALUE);
213+
}
214+
204215
/**
205216
* In case the function takes less or equal to 2 arguments (so it is <it>at least</it> binary)
206217
* we also try to call a ternary function.

graalpython/lib-graalpython/_sre.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def setup(sre_compiler, error_class, flags_table):
170170
(FLAG_DOTALL, "DOTALL"),
171171
(FLAG_UNICODE, "UNICODE"),
172172
(FLAG_VERBOSE, "VERBOSE"),
173-
(FLAG_DEBUG, "DEBUG"),
173+
# (FLAG_DEBUG, "DEBUG"), # there is no DEBUG flag in tregex
174174
(FLAG_ASCII, "ASCII"),
175175
]
176176

@@ -311,6 +311,7 @@ def __init__(self, pattern, flags):
311311
flags_str.append(char)
312312
self.__flags_str = "".join(flags_str)
313313
self.__compiled_regexes = {}
314+
self.__cached_flags = None
314315
compiled_regex = self.__tregex_compile()
315316
self.groups = compiled_regex.groupCount - 1
316317
groups = compiled_regex.groups
@@ -331,6 +332,8 @@ def __init__(self, pattern, flags):
331332
@property
332333
def flags(self):
333334
# Flags can be spcified both in the flag argument or inline in the regex. Extract them back from the regex
335+
if self.__cached_flags != None:
336+
return self.__cached_flags
334337
flags = self.__input_flags
335338
regex_flags = self.__tregex_compile().flags
336339
for flag, name in FLAG_NAMES:
@@ -339,6 +342,7 @@ def flags(self):
339342
flags |= flag
340343
except AttributeError:
341344
pass
345+
self.__cached_flags = flags
342346
return flags
343347

344348
def __check_input_type(self, input):

0 commit comments

Comments
 (0)