Skip to content

Commit 4224281

Browse files
committed
[GR-48910] Add Arrow Vector support
PullRequest: graalpython/3607
2 parents bd7ad3c + 0f83fa8 commit 4224281

24 files changed

+1825
-4
lines changed

graalpython/com.oracle.graal.python.frozen/freeze_modules.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@
102102
def add_graalpython_core():
103103
lib_graalpython = os.path.join(os.path.dirname(__file__), "..", "lib-graalpython")
104104
l = []
105+
l.append("polyglot.arrow : polyglot.arrow = " + os.path.join(lib_graalpython, "modules/_polyglot_arrow.py"))
105106
for name in [
106107
"modules/_sysconfigdata",
107108
]:

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/GraalPythonModuleBuiltins.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@
8383
import java.util.List;
8484
import java.util.logging.Level;
8585

86+
import com.oracle.graal.python.nodes.arrow.ArrowArray;
87+
import com.oracle.graal.python.nodes.arrow.ArrowSchema;
88+
import com.oracle.graal.python.nodes.arrow.capsule.CreateArrowPyCapsuleNode;
89+
import com.oracle.graal.python.nodes.arrow.vector.VectorToArrowArrayNode;
90+
import com.oracle.graal.python.nodes.arrow.vector.VectorToArrowSchemaNode;
8691
import org.graalvm.nativeimage.ImageInfo;
8792

8893
import com.oracle.graal.python.PythonLanguage;
@@ -1301,4 +1306,19 @@ Object invokeMember(String member, Object[] arguments) throws UnsupportedMessage
13011306
}
13021307
}
13031308
}
1309+
1310+
@Builtin(name = "export_arrow_vector", minNumOfPositionalArgs = 1)
1311+
@GenerateNodeFactory
1312+
public abstract static class ExportArrowVector extends PythonUnaryBuiltinNode {
1313+
@Specialization
1314+
static PTuple doExport(Object vector,
1315+
@Bind("this") Node inliningTarget,
1316+
@Cached VectorToArrowArrayNode exportArray,
1317+
@Cached VectorToArrowSchemaNode exportSchema,
1318+
@Cached CreateArrowPyCapsuleNode createArrowCapsuleNode) {
1319+
ArrowArray arrowArray = exportArray.execute(inliningTarget, vector);
1320+
ArrowSchema arrowSchema = exportSchema.execute(inliningTarget, vector);
1321+
return createArrowCapsuleNode.execute(inliningTarget, arrowArray, arrowSchema);
1322+
}
1323+
}
13041324
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextCapsuleBuiltins.java

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@
7070
import com.oracle.truffle.api.dsl.Bind;
7171
import com.oracle.truffle.api.dsl.Cached;
7272
import com.oracle.truffle.api.dsl.Fallback;
73+
import com.oracle.truffle.api.dsl.GenerateCached;
74+
import com.oracle.truffle.api.dsl.GenerateInline;
75+
import com.oracle.truffle.api.dsl.GenerateUncached;
7376
import com.oracle.truffle.api.dsl.Specialization;
7477
import com.oracle.truffle.api.interop.InteropLibrary;
7578
import com.oracle.truffle.api.library.CachedLibrary;
@@ -83,6 +86,19 @@ abstract static class PyCapsule_New extends CApiTernaryBuiltinNode {
8386
@Specialization
8487
static Object doGeneric(Object pointer, Object namePtr, Object destructor,
8588
@Bind("this") Node inliningTarget,
89+
@Cached PyCapsuleNewNode pyCapsuleNewNode) {
90+
return pyCapsuleNewNode.execute(inliningTarget, pointer, namePtr, destructor);
91+
}
92+
}
93+
94+
@GenerateCached(false)
95+
@GenerateInline
96+
public abstract static class PyCapsuleNewNode extends Node {
97+
98+
public abstract PyCapsule execute(Node inliningTarget, Object pointer, Object name, Object destructor);
99+
100+
@Specialization
101+
static PyCapsule doGeneric(Node inliningTarget, Object pointer, Object namePtr, Object destructor,
86102
@CachedLibrary(limit = "1") InteropLibrary interopLibrary,
87103
@Cached PythonObjectFactory factory,
88104
@Cached PRaiseNode.Lazy raiseNode) {
@@ -121,8 +137,22 @@ static Object doError(@SuppressWarnings("unused") Object o, @SuppressWarnings("u
121137
@CApiBuiltin(ret = Pointer, args = {PyObject, ConstCharPtr}, call = Direct)
122138
abstract static class PyCapsule_GetPointer extends CApiBinaryBuiltinNode {
123139
@Specialization
124-
static Object doCapsule(PyCapsule o, Object name,
140+
static Object doCapsule(Object o, Object name,
125141
@Bind("this") Node inliningTarget,
142+
@Cached PyCapsuleGetPointerNode pyCapsuleGetPointerNode) {
143+
return pyCapsuleGetPointerNode.execute(inliningTarget, o, name);
144+
}
145+
}
146+
147+
@GenerateCached(false)
148+
@GenerateInline
149+
@GenerateUncached
150+
public abstract static class PyCapsuleGetPointerNode extends Node {
151+
152+
public abstract Object execute(Node inliningTarget, Object capsule, Object name);
153+
154+
@Specialization
155+
static Object doCapsule(Node inliningTarget, PyCapsule o, Object name,
126156
@Cached PyCapsuleNameMatchesNode nameMatchesNode,
127157
@Cached PRaiseNode.Lazy raiseNode) {
128158
if (o.getPointer() == null) {
@@ -135,9 +165,9 @@ static Object doCapsule(PyCapsule o, Object name,
135165
}
136166

137167
@Fallback
138-
static Object doError(@SuppressWarnings("unused") Object o, @SuppressWarnings("unused") Object name,
139-
@Cached PRaiseNode raiseNode) {
140-
throw raiseNode.raise(ValueError, CALLED_WITH_INVALID_PY_CAPSULE_OBJECT, "PyCapsule_GetPointer");
168+
static Object doError(Node inliningTarget, @SuppressWarnings("unused") Object o, @SuppressWarnings("unused") Object name,
169+
@Cached PRaiseNode.Lazy raiseNode) {
170+
throw raiseNode.get(inliningTarget).raise(ValueError, CALLED_WITH_INVALID_PY_CAPSULE_OBJECT, "PyCapsule_GetPointer");
141171
}
142172
}
143173

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/module/FrozenModules.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ private static final class Map {
216216
private static final PythonFrozenModule __PHELLO___HAM_EGGS = new PythonFrozenModule("__PHELLO___HAM_EGGS", "__phello__.ham.eggs", false);
217217
private static final PythonFrozenModule __PHELLO___SPAM = new PythonFrozenModule("__PHELLO___SPAM", "__phello__.spam", false);
218218
private static final PythonFrozenModule FROZEN_ONLY = new PythonFrozenModule("FROZEN_ONLY", null, false);
219+
private static final PythonFrozenModule POLYGLOT_ARROW = new PythonFrozenModule("POLYGLOT_ARROW", null, false);
219220
private static final PythonFrozenModule _SYSCONFIGDATA = new PythonFrozenModule("_SYSCONFIGDATA", null, false);
220221
private static final PythonFrozenModule GRAALPY___GRAALPYTHON__ = new PythonFrozenModule("GRAALPY___GRAALPYTHON__", null, false);
221222
private static final PythonFrozenModule GRAALPY__POLYGLOT = new PythonFrozenModule("GRAALPY__POLYGLOT", null, false);
@@ -591,6 +592,8 @@ public static final PythonFrozenModule lookup(String name) {
591592
return Map.__PHELLO___SPAM;
592593
case "__hello_only__":
593594
return Map.FROZEN_ONLY;
595+
case "polyglot.arrow":
596+
return Map.POLYGLOT_ARROW;
594597
case "_sysconfigdata":
595598
return Map._SYSCONFIGDATA;
596599
case "graalpy.__graalpython__":

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/ErrorMessages.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1665,4 +1665,6 @@ public abstract class ErrorMessages {
16651665
public static final TruffleString STRUCT_ITER_CANNOT_UNPACK_FROM_STRUCT_OF_SIZE_0 = tsLiteral("cannot iteratively unpack with a struct of length 0");
16661666
public static final TruffleString STRUCT_ITER_UNPACK_REQ_A_BUFFER_OF_A_MUL_OF_BYTES = tsLiteral("iterative unpacking requires a buffer of a multiple of %d bytes");
16671667
public static final TruffleString CANNOT_CREATE_P_OBJECTS = tsLiteral("Cannot create %p objects");
1668+
public static final TruffleString ARROW_ARRAY_ALREADY_RELEASED = tsLiteral("Cannot release already released ArrowArray");
1669+
public static final TruffleString ARROW_SCHEMA_ALREADY_RELEASED = tsLiteral("Cannot release already released ArrowSchema");
16681670
}
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
/*
2+
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* The Universal Permissive License (UPL), Version 1.0
6+
*
7+
* Subject to the condition set forth below, permission is hereby granted to any
8+
* person obtaining a copy of this software, associated documentation and/or
9+
* data (collectively the "Software"), free of charge and under any and all
10+
* copyright rights in the Software, and any and all patent rights owned or
11+
* freely licensable by each licensor hereunder covering either (i) the
12+
* unmodified Software as contributed to or provided by such licensor, or (ii)
13+
* the Larger Works (as defined below), to deal in both
14+
*
15+
* (a) the Software, and
16+
*
17+
* (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
18+
* one is included with the Software each a "Larger Work" to which the Software
19+
* is contributed by such licensors),
20+
*
21+
* without restriction, including without limitation the rights to copy, create
22+
* derivative works of, display, perform, and distribute the Software and make,
23+
* use, sell, offer for sale, import, export, have made, and have sold the
24+
* Software and the Larger Work(s), and to sublicense the foregoing rights on
25+
* either these or other terms.
26+
*
27+
* This license is subject to the following condition:
28+
*
29+
* The above copyright notice and either this complete permission notice or at a
30+
* minimum a reference to the UPL must be included in all copies or substantial
31+
* portions of the Software.
32+
*
33+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
36+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
37+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
38+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
39+
* SOFTWARE.
40+
*/
41+
package com.oracle.graal.python.nodes.arrow;
42+
43+
import com.oracle.graal.python.builtins.objects.capsule.PyCapsule;
44+
import com.oracle.graal.python.util.PythonUtils;
45+
import sun.misc.Unsafe;
46+
47+
import static com.oracle.graal.python.builtins.objects.cext.structs.CStructAccess.POINTER_SIZE;
48+
49+
/**
50+
* C Data Interface ArrowArray.
51+
* <p>
52+
* Represents a wrapper for the following C structure:
53+
*
54+
* <pre>
55+
* struct ArrowArray {
56+
* // Array data description
57+
* int64_t length; 0
58+
* int64_t null_count; 8
59+
* int64_t offset; 16
60+
* int64_t n_buffers; 24
61+
* int64_t n_children; 32
62+
* const void** buffers; 40
63+
* struct ArrowArray** children; 48
64+
* struct ArrowArray* dictionary; 56
65+
*
66+
* // Release callback
67+
* void (*release)(struct ArrowArray*); 64
68+
* // Opaque producer-specific data
69+
* void* private_data; 72
70+
* };
71+
* </pre>
72+
*/
73+
public class ArrowArray {
74+
75+
private static final Unsafe unsafe = PythonUtils.initUnsafe();
76+
public static final byte[] CAPSULE_NAME = PyCapsule.capsuleName("arrow_array");
77+
78+
public static final byte NULL = 0;
79+
private static final byte SIZE_OF = 80;
80+
81+
private static final long LENGTH_INDEX = 0;
82+
private static final long NULL_COUNT_INDEX = POINTER_SIZE;
83+
private static final long OFFSET_INDEX = 2 * POINTER_SIZE;
84+
private static final long N_BUFFERS_INDEX = 3 * POINTER_SIZE;
85+
private static final long N_CHILDREN_INDEX = 4 * POINTER_SIZE;
86+
private static final long BUFFERS_INDEX = 5 * POINTER_SIZE;
87+
private static final long CHILDREN_INDEX = 6 * POINTER_SIZE;
88+
private static final long DICTIONARY_INDEX = 7 * POINTER_SIZE;
89+
private static final long RELEASE_CALLBACK_INDEX = 8 * POINTER_SIZE;
90+
private static final long PRIVATE_DATA_INDEX = 9 * POINTER_SIZE;
91+
92+
public final long memoryAddr;
93+
94+
private ArrowArray(long memoryAddr) {
95+
this.memoryAddr = memoryAddr;
96+
}
97+
98+
public static ArrowArray allocate() {
99+
var arrowArray = new ArrowArray(unsafe.allocateMemory(SIZE_OF));
100+
arrowArray.markReleased();
101+
return arrowArray;
102+
}
103+
104+
public static ArrowArray allocateFromSnapshot(Snapshot snapshot) {
105+
var arrowArray = new ArrowArray(unsafe.allocateMemory(SIZE_OF));
106+
arrowArray.load(snapshot);
107+
return arrowArray;
108+
}
109+
110+
public static ArrowArray wrap(long arrowArrayPointer) {
111+
return new ArrowArray(arrowArrayPointer);
112+
}
113+
114+
public void markReleased() {
115+
unsafe.putLong(memoryAddr + RELEASE_CALLBACK_INDEX, NULL);
116+
}
117+
118+
public boolean isReleased() {
119+
return unsafe.getLong(memoryAddr + RELEASE_CALLBACK_INDEX) == NULL;
120+
}
121+
122+
public long getBuffers() {
123+
return unsafe.getLong(memoryAddr + BUFFERS_INDEX);
124+
}
125+
126+
public long getValueBuffer() {
127+
return unsafe.getLong(getBuffers() + POINTER_SIZE);
128+
}
129+
130+
private void load(Snapshot snapshot) {
131+
unsafe.putLong(memoryAddr + LENGTH_INDEX, snapshot.length);
132+
unsafe.putLong(memoryAddr + NULL_COUNT_INDEX, snapshot.null_count);
133+
unsafe.putLong(memoryAddr + OFFSET_INDEX, snapshot.offset);
134+
unsafe.putLong(memoryAddr + N_BUFFERS_INDEX, snapshot.n_buffers);
135+
unsafe.putLong(memoryAddr + N_CHILDREN_INDEX, snapshot.n_children);
136+
unsafe.putLong(memoryAddr + BUFFERS_INDEX, snapshot.buffers);
137+
unsafe.putLong(memoryAddr + CHILDREN_INDEX, snapshot.children);
138+
unsafe.putLong(memoryAddr + DICTIONARY_INDEX, snapshot.dictionary);
139+
unsafe.putLong(memoryAddr + RELEASE_CALLBACK_INDEX, snapshot.release);
140+
unsafe.putLong(memoryAddr + PRIVATE_DATA_INDEX, snapshot.private_data);
141+
}
142+
143+
public static class Snapshot {
144+
public long length = 0L;
145+
public long null_count = 0L;
146+
public long offset = 0L;
147+
public long n_buffers = 0L;
148+
public long n_children = 0L;
149+
public long buffers = 0L;
150+
public long children = 0L;
151+
public long dictionary = 0L;
152+
public long release = 0L;
153+
public long private_data = 0L;
154+
}
155+
}

0 commit comments

Comments
 (0)