Skip to content

Commit 54b3825

Browse files
committed
feat: add head, tail methods
1 parent cdec202 commit 54b3825

File tree

2 files changed

+46
-0
lines changed

2 files changed

+46
-0
lines changed

python/datafusion/dataframe.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,30 @@ def limit(self, count: int, offset: int = 0) -> DataFrame:
223223
"""
224224
return DataFrame(self.df.limit(count, offset))
225225

226+
def head(self, n: int) -> DataFrame:
227+
"""Return a new :py:class:`DataFrame` with a limited number of rows.
228+
229+
Args:
230+
n: Number of rows to take from the head of the DataFrame.
231+
232+
Returns:
233+
DataFrame after limiting.
234+
"""
235+
return DataFrame(self.df.limit(n, 0))
236+
237+
def tail(self, n: int) -> DataFrame:
238+
"""Return a new :py:class:`DataFrame` with a limited number of rows.
239+
240+
Be aware this could be potentially expensive due to the size of the frame.
241+
242+
Args:
243+
n: Number of rows to take from the tail of the DataFrame.
244+
245+
Returns:
246+
DataFrame after limiting.
247+
"""
248+
return DataFrame(self.df.limit(n, max(0, self.count() - n)))
249+
226250
def collect(self) -> list[pa.RecordBatch]:
227251
"""Execute this :py:class:`DataFrame` and collect results into memory.
228252

python/tests/test_dataframe.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,28 @@ def test_limit_with_offset(df):
190190
assert len(result.column(1)) == 1
191191

192192

193+
def test_head(df):
194+
df = df.head(1)
195+
196+
# execute and collect the first (and only) batch
197+
result = df.collect()[0]
198+
199+
assert result.column(0) == pa.array([1])
200+
assert result.column(1) == pa.array([4])
201+
assert result.column(2) == pa.array([8])
202+
203+
204+
def test_tail(df):
205+
df = df.tail(1)
206+
207+
# execute and collect the first (and only) batch
208+
result = df.collect()[0]
209+
210+
assert result.column(0) == pa.array([3])
211+
assert result.column(1) == pa.array([6])
212+
assert result.column(2) == pa.array([8])
213+
214+
193215
def test_with_column(df):
194216
df = df.with_column("c", column("a") + column("b"))
195217

0 commit comments

Comments
 (0)