Skip to content

Commit b3c62ef

Browse files
BUG: gpu dataframe iloc (#419)
Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
1 parent b0d0786 commit b3c62ef

File tree

2 files changed

+33
-24
lines changed

2 files changed

+33
-24
lines changed

python/xorbits/_mars/dataframe/indexing/iloc.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,7 @@ def tile(cls, op):
541541
return [(yield from handler.handle(op))]
542542

543543
@classmethod
544-
def execute(cls, ctx, op):
544+
def execute(cls, ctx, op: "SeriesIlocGetItem"):
545545
chunk = op.outputs[0]
546546
series = ctx[op.input.key]
547547
if len(op.inputs) > 1:
@@ -551,6 +551,8 @@ def execute(cls, ctx, op):
551551
)
552552
else:
553553
indexes = tuple(op.indexes)
554+
if op.is_gpu() and len(indexes) == 1:
555+
indexes = indexes[0]
554556
if hasattr(series, "iloc"):
555557
ctx[chunk.key] = series.iloc[indexes]
556558
else:

python/xorbits/_mars/dataframe/indexing/tests/test_indexing_execution.py

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,14 @@ def test_set_index(setup, chunk_size):
6565
pd.testing.assert_frame_equal(expected, df2.execute().fetch())
6666

6767

68-
def test_iloc_getitem(setup):
68+
@support_cuda
69+
def test_iloc_getitem(setup_gpu, gpu):
6970
df1 = pd.DataFrame(
7071
[[1, 3, 3], [4, 2, 6], [7, 8, 9]],
7172
index=["a1", "a2", "a3"],
7273
columns=["x", "y", "z"],
7374
)
74-
df2 = md.DataFrame(df1, chunk_size=2)
75+
df2 = md.DataFrame(df1, chunk_size=2, gpu=gpu)
7576

7677
# plain index
7778
expected = df1.iloc[1]
@@ -125,65 +126,71 @@ def test_iloc_getitem(setup):
125126
pd.testing.assert_frame_equal(expected, df12.execute().fetch())
126127

127128
# bool index array on axis 1
128-
expected = df1.iloc[[2, 1], [True, False, True]]
129-
df14 = df2.iloc[[2, 1], [True, False, True]]
130-
pd.testing.assert_frame_equal(expected, df14.execute().fetch())
129+
if not gpu:
130+
# TODO: skipped due to an cudf boolean indexing issue.
131+
expected = df1.iloc[[2, 1], [True, False, True]]
132+
df14 = df2.iloc[[2, 1], [True, False, True]]
133+
pd.testing.assert_frame_equal(expected, df14.execute().fetch())
131134

132135
# bool index
133-
expected = df1.iloc[[True, False, True], [2, 1]]
134-
df13 = df2.iloc[md.Series([True, False, True], chunk_size=1), [2, 1]]
135-
pd.testing.assert_frame_equal(expected, df13.execute().fetch())
136+
if not gpu:
137+
# TODO: skipped due to an cudf boolean indexing issue.
138+
expected = df1.iloc[[True, False, True], [2, 1]]
139+
df13 = df2.iloc[md.Series([True, False, True], chunk_size=1, gpu=gpu), [2, 1]]
140+
pd.testing.assert_frame_equal(expected, df13.execute().fetch())
136141

137142
# test Series
138143
data = pd.Series(np.arange(10))
139-
series = md.Series(data, chunk_size=3).iloc[:3]
144+
series = md.Series(data, chunk_size=3, gpu=gpu).iloc[:3]
140145
pd.testing.assert_series_equal(series.execute().fetch(), data.iloc[:3])
141146

142-
series = md.Series(data, chunk_size=3).iloc[4]
147+
series = md.Series(data, chunk_size=3, gpu=gpu).iloc[4]
143148
assert series.execute().fetch() == data.iloc[4]
144149

145-
series = md.Series(data, chunk_size=3).iloc[[2, 3, 4, 9]]
150+
series = md.Series(data, chunk_size=3, gpu=gpu).iloc[[2, 3, 4, 9]]
146151
pd.testing.assert_series_equal(series.execute().fetch(), data.iloc[[2, 3, 4, 9]])
147152

148-
series = md.Series(data, chunk_size=3).iloc[[4, 3, 9, 2]]
153+
series = md.Series(data, chunk_size=3, gpu=gpu).iloc[[4, 3, 9, 2]]
149154
pd.testing.assert_series_equal(series.execute().fetch(), data.iloc[[4, 3, 9, 2]])
150155

151-
series = md.Series(data).iloc[5:]
156+
series = md.Series(data, gpu=gpu).iloc[5:]
152157
pd.testing.assert_series_equal(series.execute().fetch(), data.iloc[5:])
153158

154159
# bool index array
155160
selection = np.random.RandomState(0).randint(2, size=10, dtype=bool)
156-
series = md.Series(data).iloc[selection]
161+
series = md.Series(data, gpu=gpu).iloc[selection]
157162
pd.testing.assert_series_equal(series.execute().fetch(), data.iloc[selection])
158163

159164
# bool index
160-
series = md.Series(data).iloc[md.Series(selection, chunk_size=4)]
165+
series = md.Series(data, gpu=gpu).iloc[md.Series(selection, chunk_size=4, gpu=gpu)]
161166
pd.testing.assert_series_equal(series.execute().fetch(), data.iloc[selection])
162167

163168
# test index
164169
data = pd.Index(np.arange(10))
165-
index = md.Index(data, chunk_size=3)[:3]
170+
index = md.Index(data, chunk_size=3, gpu=gpu)[:3]
166171
pd.testing.assert_index_equal(index.execute().fetch(), data[:3])
167172

168-
index = md.Index(data, chunk_size=3)[4]
173+
index = md.Index(data, chunk_size=3, gpu=gpu)[4]
169174
assert index.execute().fetch() == data[4]
170175

171-
index = md.Index(data, chunk_size=3)[[2, 3, 4, 9]]
176+
index = md.Index(data, chunk_size=3, gpu=gpu)[[2, 3, 4, 9]]
172177
pd.testing.assert_index_equal(index.execute().fetch(), data[[2, 3, 4, 9]])
173178

174-
index = md.Index(data, chunk_size=3)[[4, 3, 9, 2]]
179+
index = md.Index(data, chunk_size=3, gpu=gpu)[[4, 3, 9, 2]]
175180
pd.testing.assert_index_equal(index.execute().fetch(), data[[4, 3, 9, 2]])
176181

177-
index = md.Index(data)[5:]
182+
index = md.Index(data, gpu=gpu)[5:]
178183
pd.testing.assert_index_equal(index.execute().fetch(), data[5:])
179184

180185
# bool index array
181186
selection = np.random.RandomState(0).randint(2, size=10, dtype=bool)
182-
index = md.Index(data)[selection]
187+
index = md.Index(data, gpu=gpu)[selection]
183188
pd.testing.assert_index_equal(index.execute().fetch(), data[selection])
184189

185-
index = md.Index(data)[mt.tensor(selection, chunk_size=4)]
186-
pd.testing.assert_index_equal(index.execute().fetch(), data[selection])
190+
if not gpu:
191+
# TODO: skipped due to an cudf boolean indexing issue.
192+
index = md.Index(data, gpu=gpu)[mt.tensor(selection, chunk_size=4, gpu=gpu)]
193+
pd.testing.assert_index_equal(index.execute().fetch(), data[selection])
187194

188195

189196
def test_iloc_setitem(setup):

0 commit comments

Comments
 (0)