Skip to content

Commit e945a20

Browse files
committed
[GR-52848] Fix numpy benchmark failures
PullRequest: graalpython/3266
2 parents d5db163 + 15bff64 commit e945a20

File tree

19 files changed

+180
-257
lines changed

19 files changed

+180
-257
lines changed

graalpython/com.oracle.graal.python.cext/include/cpython/listobject.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,12 @@ static inline Py_ssize_t PyList_GET_SIZE(PyObject *op) {
4545

4646
#define PyList_GET_ITEM(op, index) (PyList_GetItem((PyObject*)(op), (index)))
4747

48-
PyAPI_FUNC(void) PyTruffleList_SET_ITEM(PyObject *op, Py_ssize_t index, PyObject *value);
48+
// GraalPy-specific
49+
PyAPI_FUNC(PyObject **) PyTruffleList_GetItems(PyObject *op);
50+
4951
static inline void
5052
PyList_SET_ITEM(PyObject *op, Py_ssize_t index, PyObject *value) {
51-
PyTruffleList_SET_ITEM(op, index, value);
53+
PyTruffleList_GetItems(op)[index] = value;
5254
}
5355
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
5456
#define PyList_SET_ITEM(op, index, value) \

graalpython/com.oracle.graal.python.cext/include/cpython/tupleobject.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,13 @@ static inline Py_ssize_t PyTuple_GET_SIZE(PyObject *op) {
3636
PyAPI_FUNC(PyObject *) _PyTuple_GET_ITEM(PyObject *, Py_ssize_t);
3737
#define PyTuple_GET_ITEM(op, i) _PyTuple_GET_ITEM(_PyObject_CAST(op), (i))
3838

39+
// GraalPy-specific
40+
PyAPI_FUNC(PyObject **) PyTruffleTuple_GetItems(PyObject *op);
41+
3942
/* Function *only* to be used to fill in brand new tuples */
40-
PyAPI_FUNC(void) PyTruffleTuple_SET_ITEM(PyObject *op, Py_ssize_t index, PyObject *value);
4143
static inline void
4244
PyTuple_SET_ITEM(PyObject *op, Py_ssize_t index, PyObject *value) {
43-
PyTruffleTuple_SET_ITEM(op, index, value);
45+
PyTruffleTuple_GetItems(op)[index] = value;
4446
}
4547
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
4648
#define PyTuple_SET_ITEM(op, index, value) \

graalpython/com.oracle.graal.python.cext/src/abstract.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3061,7 +3061,11 @@ PyNumber_InPlaceOr(PyObject *v, PyObject *w)
30613061
PyObject **
30623062
PyTruffleSequence_Fast_ITEMS(PyObject *o)
30633063
{
3064-
return PyList_Check(o) ? PyListObject_ob_item(o) : PyTupleObject_ob_item(o);
3064+
if (PyTuple_Check(o)) {
3065+
return PyTruffleTuple_GetItems(o);
3066+
} else {
3067+
return PyTruffleList_GetItems(o);
3068+
}
30653069
}
30663070

30673071
PyObject*

graalpython/com.oracle.graal.python.cext/src/bytesobject.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ PyBytes_FromString(const char *str)
153153
PyObject *
154154
PyBytes_FromFormatV(const char *format, va_list vargs)
155155
{
156+
// GraalPy change: different implementation
156157
/* Unfortunately, we need to know the expected types of the arguments before we can do an upcall. */
157158
char *s;
158159
const char *f = format;
@@ -291,7 +292,7 @@ PyBytes_FromFormatV(const char *format, va_list vargs)
291292
entry = PyLong_FromVoidPtr(va_arg(vargs, void*));
292293
break;
293294
}
294-
GraalPyTuple_SetItem(args, i, entry);
295+
PyTuple_SET_ITEM(args, i, entry);
295296
}
296297
PyObject* result = GraalPyTruffleBytes_FromFormat(format, args);
297298
Py_DecRef(args);

graalpython/com.oracle.graal.python.cext/src/listobject.c

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,47 @@
4242

4343
// alias for internal function, currently used in PyO3
4444
void _PyList_SET_ITEM(PyObject* a, Py_ssize_t b, PyObject* c) {
45-
return PyTruffleList_SET_ITEM(a, b, c);
45+
return PyList_SET_ITEM(a, b, c);
46+
}
47+
48+
static inline int
49+
valid_index(Py_ssize_t i, Py_ssize_t limit)
50+
{
51+
/* The cast to size_t lets us use just a single comparison
52+
to check whether i is in the range: 0 <= i < limit.
53+
54+
See: Section 14.2 "Bounds Checking" in the Agner Fog
55+
optimization manual found at:
56+
https://www.agner.org/optimize/optimizing_cpp.pdf
57+
*/
58+
return (size_t) i < (size_t) limit;
59+
}
60+
61+
int
62+
PyList_SetItem(PyObject *op, Py_ssize_t i,
63+
PyObject *newitem)
64+
{
65+
PyObject **p;
66+
if (!PyList_Check(op)) {
67+
Py_XDECREF(newitem);
68+
PyErr_BadInternalCall();
69+
return -1;
70+
}
71+
if (!valid_index(i, Py_SIZE(op))) {
72+
Py_XDECREF(newitem);
73+
PyErr_SetString(PyExc_IndexError,
74+
"list assignment index out of range");
75+
return -1;
76+
}
77+
// GraalPy change: avoid direct struct access
78+
p = PyTruffleList_GetItems(op) + i;
79+
Py_XSETREF(*p, newitem);
80+
return 0;
81+
}
82+
83+
// GraalPy-additions
84+
PyObject **
85+
PyTruffleList_GetItems(PyObject *op)
86+
{
87+
return PyListObject_ob_item(op);
4688
}

graalpython/com.oracle.graal.python.cext/src/tupleobject.c

Lines changed: 62 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
#include "pycore_abstract.h" // _PyIndex_Check()
1313
#include "pycore_gc.h" // _PyObject_GC_IS_TRACKED()
1414
#include "pycore_initconfig.h" // _PyStatus_OK()
15-
#include "pycore_object.h" // _PyObject_GC_TRACK(), _Py_FatalRefcountError()
1615
#endif // GraalPy change
16+
#include "pycore_object.h" // _PyObject_GC_TRACK(), _Py_FatalRefcountError()
1717

1818
#if 0 // GraalPy change
1919
/*[clinic input]
@@ -136,12 +136,12 @@ PyTuple_GetItem(PyObject *op, Py_ssize_t i) {
136136
#endif /* GRAALVM_PYTHON_LLVM_MANAGED */
137137
}
138138

139-
#if 0 // GraalPy change
140139
int
141140
PyTuple_SetItem(PyObject *op, Py_ssize_t i, PyObject *newitem)
142141
{
143142
PyObject **p;
144-
if (!PyTuple_Check(op) || Py_REFCNT(op) != 1) {
143+
// GraalPy change: remove refcount check
144+
if (!PyTuple_Check(op)) {
145145
Py_XDECREF(newitem);
146146
PyErr_BadInternalCall();
147147
return -1;
@@ -152,11 +152,13 @@ PyTuple_SetItem(PyObject *op, Py_ssize_t i, PyObject *newitem)
152152
"tuple assignment index out of range");
153153
return -1;
154154
}
155-
p = ((PyTupleObject *)op) -> ob_item + i;
155+
// GraalPy change: avoid direct struct access
156+
p = PyTruffleTuple_GetItems(op) + i;
156157
Py_XSETREF(*p, newitem);
157158
return 0;
158159
}
159160

161+
#if 0 // GraalPy change
160162
void
161163
_PyTuple_MaybeUntrack(PyObject *op)
162164
{
@@ -1301,22 +1303,47 @@ _PyTuple_DebugMallocStats(FILE *out)
13011303
#endif // GraalPy change
13021304

13031305
// GraalPy additions
1304-
PyObject* PyTruffle_Tuple_Alloc(PyTypeObject* cls, Py_ssize_t nitems) {
1306+
PyObject* PyTruffle_Tuple_Alloc(PyTypeObject* type, Py_ssize_t nitems) {
13051307
/*
13061308
* TODO(fa): For 'PyVarObjects' (i.e. 'nitems > 0') we increase the size by 'sizeof(void *)'
13071309
* because this additional pointer can then be used as pointer to the element array.
13081310
* CPython usually embeds the array in the struct but Sulong doesn't currently support that.
13091311
* So we allocate space for the additional array pointer.
13101312
* Also consider any 'PyVarObject' (in particular 'PyTupleObject') if this is fixed.
1313+
*
1314+
* This function is mostly an inlined copy-paste of PyType_GenericAlloc, with different size
1315+
* and added initialization of ob_item
13111316
*/
1312-
Py_ssize_t size = cls->tp_basicsize + cls->tp_itemsize * nitems + sizeof(PyObject **);
1313-
PyObject* newObj = (PyObject*)PyObject_Malloc(size);
1314-
if(cls->tp_dictoffset) {
1315-
*((PyObject **) ((char *)newObj + cls->tp_dictoffset)) = NULL;
1316-
}
1317-
PyObject_INIT_VAR(newObj, cls, nitems);
1318-
((PyTupleObject*)newObj)->ob_item = (PyObject **) ((char *)newObj + offsetof(PyTupleObject, ob_item) + sizeof(PyObject **));
1319-
return newObj;
1317+
PyObject *obj;
1318+
const size_t size = _PyObject_VAR_SIZE(type, nitems+1) + sizeof(PyObject **);
1319+
/* note that we need to add one, for the sentinel */
1320+
1321+
const size_t presize = _PyType_PreHeaderSize(type);
1322+
char *alloc = PyObject_Malloc(size + presize);
1323+
if (alloc == NULL) {
1324+
return PyErr_NoMemory();
1325+
}
1326+
obj = (PyObject *)(alloc + presize);
1327+
if (presize) {
1328+
// GraalPy change: different header layout, no GC link
1329+
((PyObject **)alloc)[0] = NULL;
1330+
}
1331+
memset(obj, '\0', size);
1332+
1333+
if (type->tp_itemsize == 0) {
1334+
_PyObject_Init(obj, type);
1335+
}
1336+
else {
1337+
_PyObject_InitVar((PyVarObject *)obj, type, nitems);
1338+
}
1339+
1340+
if (_PyType_IS_GC(type)) {
1341+
_PyObject_GC_TRACK(obj);
1342+
}
1343+
1344+
((PyTupleObject*)obj)->ob_item = (PyObject **) ((char *)obj + offsetof(PyTupleObject, ob_item) + sizeof(PyObject **));
1345+
1346+
return obj;
13201347
}
13211348

13221349
void PyTruffle_Tuple_Dealloc(PyTupleObject* self) {
@@ -1330,30 +1357,40 @@ void PyTruffle_Tuple_Dealloc(PyTupleObject* self) {
13301357
Py_TYPE(self)->tp_free((PyObject *)self);
13311358
}
13321359

1333-
/*
1334-
* Unsafe variant of PyTuple_GetItem for implementing access macro
1335-
* PyTuple_GET_ITEM.
1336-
*/
1337-
PyObject*
1338-
_PyTuple_GET_ITEM(PyObject* a, Py_ssize_t b) {
1360+
PyObject **
1361+
PyTruffleTuple_GetItems(PyObject *op)
1362+
{
13391363
#ifdef GRAALVM_PYTHON_LLVM_MANAGED
1340-
return GraalPyTruffleTuple_GetItem(a, b);
1364+
return GraalPy_get_PyTupleObject_ob_item(op);
13411365
#else /* GRAALVM_PYTHON_LLVM_MANAGED */
13421366
PyObject *res;
13431367
PyObject **ob_item;
1344-
if (points_to_py_handle_space(a)) {
1345-
GraalPyVarObject *ptr = (GraalPyVarObject *) pointer_to_stub(a);
1368+
if (points_to_py_handle_space(op)) {
1369+
GraalPyVarObject *ptr = (GraalPyVarObject *) pointer_to_stub(op);
13461370
ob_item = ((GraalPyVarObject *) ptr)->ob_item;
13471371
/* The UNLIKELY is maybe not true but the branch is costly anyway. So,
13481372
if we can optimize for something, it should be the path without the
13491373
upcall. */
13501374
if (UNLIKELY(ob_item == NULL)) {
1351-
ptr->ob_item = (ob_item = GraalPy_get_PyTupleObject_ob_item((PyTupleObject *)a));
1375+
ptr->ob_item = (ob_item = GraalPy_get_PyTupleObject_ob_item((PyTupleObject *)op));
13521376
}
13531377
} else {
1354-
ob_item = ((PyTupleObject *) a)->ob_item;
1378+
ob_item = ((PyTupleObject *) op)->ob_item;
13551379
}
13561380
assert(ob_item != NULL);
1357-
return ob_item[b];
1381+
return ob_item;
1382+
#endif /* GRAALVM_PYTHON_LLVM_MANAGED */
1383+
}
1384+
1385+
/*
1386+
* Unsafe variant of PyTuple_GetItem for implementing access macro
1387+
* PyTuple_GET_ITEM.
1388+
*/
1389+
PyObject*
1390+
_PyTuple_GET_ITEM(PyObject* a, Py_ssize_t b) {
1391+
#ifdef GRAALVM_PYTHON_LLVM_MANAGED
1392+
return GraalPyTruffleTuple_GetItem(a, b);
1393+
#else /* GRAALVM_PYTHON_LLVM_MANAGED */
1394+
return PyTruffleTuple_GetItems(a)[b];
13581395
#endif /* GRAALVM_PYTHON_LLVM_MANAGED */
13591396
}

graalpython/com.oracle.graal.python.test/src/tests/cpyext/test_method.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,10 @@ def test_methods(self):
6565
PyObject *argTuple = PyTuple_New(nargs);
6666
if (!argTuple)
6767
return NULL;
68-
for (int i = 0; i < nargs; i++)
68+
for (int i = 0; i < nargs; i++) {
69+
Py_INCREF(args[i]);
6970
PyTuple_SET_ITEM(argTuple, i, args[i]);
71+
}
7072
return argTuple;
7173
}
7274
static PyObject* keywordsToDict(PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) {

graalpython/com.oracle.graal.python.test/src/tests/cpyext/test_tuple.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ class TestPyTuple(CPyExtTestCase):
166166
Py_DECREF(tuple);
167167
return NULL;
168168
}
169+
Py_INCREF(item);
169170
PyTuple_SET_ITEM(tuple, i, item);
170171
}
171172
Py_INCREF(value);

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BuiltinConstructors.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@
127127
import com.oracle.graal.python.builtins.objects.cext.capi.CExtNodes.PCallCapiFunction;
128128
import com.oracle.graal.python.builtins.objects.cext.capi.ExternalFunctionNodes;
129129
import com.oracle.graal.python.builtins.objects.cext.capi.NativeCAPISymbol;
130-
import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.NativeToPythonNode;
130+
import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.NativeToPythonTransferNode;
131131
import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.PythonToNativeNode;
132132
import com.oracle.graal.python.builtins.objects.cext.common.CArrayWrappers.CByteArrayWrapper;
133133
import com.oracle.graal.python.builtins.objects.code.CodeNodes;
@@ -360,7 +360,7 @@ static PBytes doManaged(@SuppressWarnings("unused") Node inliningTarget, Object
360360
static Object doNative(@SuppressWarnings("unused") Node inliningTarget, Object cls, byte[] bytes,
361361
@SuppressWarnings("unused") @Shared @Cached TypeNodes.NeedsNativeAllocationNode needsNativeAllocationNode,
362362
@Cached(inline = false) PythonToNativeNode toNative,
363-
@Cached(inline = false) NativeToPythonNode toPython,
363+
@Cached(inline = false) NativeToPythonTransferNode toPython,
364364
@Cached(inline = false) PCallCapiFunction call) {
365365
CByteArrayWrapper wrapper = new CByteArrayWrapper(bytes);
366366
try {
@@ -415,7 +415,7 @@ static PComplex doManaged(@SuppressWarnings("unused") Node inliningTarget, Objec
415415
static Object doNative(Node inliningTarget, Object cls, double real, double imaginary,
416416
@Cached(inline = false) PCallCapiFunction callCapiFunction,
417417
@Cached(inline = false) PythonToNativeNode toNativeNode,
418-
@Cached(inline = false) NativeToPythonNode toPythonNode,
418+
@Cached(inline = false) NativeToPythonTransferNode toPythonNode,
419419
@Cached(inline = false) ExternalFunctionNodes.DefaultCheckFunctionResultNode checkFunctionResultNode) {
420420
NativeCAPISymbol symbol = NativeCAPISymbol.FUN_COMPLEX_SUBTYPE_FROM_DOUBLES;
421421
Object nativeResult = callCapiFunction.call(symbol, toNativeNode.execute(cls), real, imaginary);
@@ -1962,7 +1962,7 @@ protected abstract static class CallNativeGenericNewNode extends Node {
19621962
@Specialization
19631963
static Object call(Object cls,
19641964
@Cached(inline = false) PythonToNativeNode toNativeNode,
1965-
@Cached(inline = false) NativeToPythonNode toPythonNode,
1965+
@Cached(inline = false) NativeToPythonTransferNode toPythonNode,
19661966
@Cached(inline = false) PCallCapiFunction callCapiFunction) {
19671967
return toPythonNode.execute(callCapiFunction.call(FUN_PY_OBJECT_NEW, toNativeNode.execute(cls)));
19681968
}
@@ -2996,7 +2996,7 @@ static Object doNativeSubtype(Object cls, Object[] args, @SuppressWarnings("unus
29962996
@Shared @Cached PythonObjectFactory factory,
29972997
@Cached PCallCapiFunction callCapiFunction,
29982998
@Cached PythonToNativeNode toNativeNode,
2999-
@Cached NativeToPythonNode toPythonNode,
2999+
@Cached NativeToPythonTransferNode toPythonNode,
30003000
@Cached ExternalFunctionNodes.DefaultCheckFunctionResultNode checkFunctionResultNode) {
30013001
Object argsTuple = args.length > 0 ? factory.createTuple(args) : factory.createEmptyTuple();
30023002
Object nativeResult = callCapiFunction.call(NativeCAPISymbol.FUN_EXCEPTION_SUBTYPE_NEW, toNativeNode.execute(cls), toNativeNode.execute(argsTuple));

0 commit comments

Comments
 (0)