Skip to content

Commit 8f674f1

Browse files
committed
Add patch for pyspark
1 parent 581940b commit 8f674f1

File tree

2 files changed

+101
-0
lines changed

2 files changed

+101
-0
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
[[rules]]
2+
version = '== 3.4.1'
3+
dist-type = 'sdist'
4+
patch = 'pyspark-3.4.1.patch'
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
diff --git a/pyspark/cloudpickle/cloudpickle.py b/pyspark/cloudpickle/cloudpickle.py
2+
index 317be69..4e409e4 100644
3+
--- a/pyspark/cloudpickle/cloudpickle.py
4+
+++ b/pyspark/cloudpickle/cloudpickle.py
5+
@@ -512,6 +512,8 @@ def _walk_global_ops(code):
6+
"""
7+
Yield referenced name for all global-referencing instructions in *code*.
8+
"""
9+
+ # GraalPy change: we don't support dis
10+
+ yield from code.co_names
11+
for instr in dis.get_instructions(code):
12+
op = instr.opcode
13+
if op in GLOBAL_OPS:
14+
diff --git a/pyspark/cloudpickle/cloudpickle_fast.py b/pyspark/cloudpickle/cloudpickle_fast.py
15+
index 63aaffa..4ee14fd 100644
16+
--- a/pyspark/cloudpickle/cloudpickle_fast.py
17+
+++ b/pyspark/cloudpickle/cloudpickle_fast.py
18+
@@ -663,7 +663,7 @@ class CloudPickler(Pickler):
19+
self.globals_ref = {}
20+
assert hasattr(self, 'proto')
21+
22+
- if pickle.HIGHEST_PROTOCOL >= 5 and not PYPY:
23+
+ if pickle.HIGHEST_PROTOCOL >= 5 and not hasattr(Pickler, 'dispatch'):
24+
# Pickler is the C implementation of the CPython pickler and therefore
25+
# we rely on reduce_override method to customize the pickler behavior.
26+
27+
diff --git a/pyspark/conf.py b/pyspark/conf.py
28+
index 1ddc8f5..4f0828f 100644
29+
--- a/pyspark/conf.py
30+
+++ b/pyspark/conf.py
31+
@@ -135,6 +135,10 @@ class SparkConf:
32+
self._jconf = None
33+
self._conf = {}
34+
35+
+ import sys
36+
+ if sys.implementation.name == 'graalpy':
37+
+ self.set('spark.python.use.daemon', False)
38+
+
39+
def set(self, key: str, value: str) -> "SparkConf":
40+
"""Set a configuration property."""
41+
# Try to set self._jconf first if JVM is created, set self._conf if JVM is not created yet.
42+
diff --git a/pyspark/java_gateway.py b/pyspark/java_gateway.py
43+
index aee206d..b1cc575 100644
44+
--- a/pyspark/java_gateway.py
45+
+++ b/pyspark/java_gateway.py
46+
@@ -92,7 +92,8 @@ def launch_gateway(conf=None, popen_kwargs=None):
47+
def preexec_func():
48+
signal.signal(signal.SIGINT, signal.SIG_IGN)
49+
50+
- popen_kwargs["preexec_fn"] = preexec_func
51+
+ # GraalPy change: we don't support preexec_fn
52+
+ # popen_kwargs["preexec_fn"] = preexec_func
53+
proc = Popen(command, **popen_kwargs)
54+
else:
55+
# preexec_fn not supported on Windows
56+
diff --git a/pyspark/worker.py b/pyspark/worker.py
57+
index cd5bb64..0bd7ac7 100644
58+
--- a/pyspark/worker.py
59+
+++ b/pyspark/worker.py
60+
@@ -887,4 +887,7 @@ if __name__ == "__main__":
61+
# TODO: Remove thw following two lines and use `Process.pid()` when we drop JDK 8.
62+
write_int(os.getpid(), sock_file)
63+
sock_file.flush()
64+
- main(sock_file, sock_file)
65+
+ try:
66+
+ main(sock_file, sock_file)
67+
+ finally:
68+
+ sock_file.close()
69+
diff --git a/setup.py b/setup.py
70+
index ead1139..825f6c9 100755
71+
--- a/setup.py
72+
+++ b/setup.py
73+
@@ -222,6 +222,24 @@ try:
74+
with open("README.md") as f:
75+
long_description = f.read()
76+
77+
+ graalpy_marker = 'graalpy-repacked-zips'
78+
+ if not os.path.exists(graalpy_marker):
79+
+ import pathlib
80+
+ import shutil
81+
+ import tempfile
82+
+ with tempfile.TemporaryDirectory() as tmp:
83+
+ target = pathlib.Path(tmp)
84+
+ shutil.unpack_archive('lib/pyspark.zip', target)
85+
+ for f in pathlib.Path('pyspark').glob('**/*.py'):
86+
+ dst = target / f
87+
+ if dst.is_file():
88+
+ shutil.copy(f, dst)
89+
+ shutil.make_archive('lib/pyspark', 'zip', target)
90+
+
91+
+
92+
+ with open(graalpy_marker, 'w'):
93+
+ pass
94+
+
95+
setup(
96+
name="pyspark",
97+
version=VERSION,

0 commit comments

Comments
 (0)