Skip to content

Commit 0e6fe51

Browse files
authored
Rename collect to to_memtable (#86)
1 parent 3ed337e commit 0e6fe51

File tree

4 files changed

+16
-9
lines changed

4 files changed

+16
-9
lines changed

python/sedonadb/python/sedonadb/dataframe.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -176,17 +176,20 @@ def to_view(self, name: str, overwrite: bool = False):
176176
"""
177177
self._impl.to_view(self._ctx, name, overwrite)
178178

179-
def collect(self) -> "DataFrame":
180-
"""Collect a data frame into memory
179+
def to_memtable(self) -> "DataFrame":
180+
"""Collect a data frame into a memtable
181181
182182
Executes the logical plan represented by this object and returns a
183183
DataFrame representing it.
184184
185+
Does not guarantee ordering of rows. Use `to_arrow_table()` if
186+
ordering is needed.
187+
185188
Examples:
186189
187190
>>> import sedonadb
188191
>>> con = sedonadb.connect()
189-
>>> con.sql("SELECT ST_Point(0, 1) as geom").collect().show()
192+
>>> con.sql("SELECT ST_Point(0, 1) as geom").to_memtable().show()
190193
┌────────────┐
191194
│ geom │
192195
│ geometry │
@@ -195,7 +198,7 @@ def collect(self) -> "DataFrame":
195198
└────────────┘
196199
197200
"""
198-
return DataFrame(self._ctx, self._impl.collect(self._ctx))
201+
return DataFrame(self._ctx, self._impl.to_memtable(self._ctx))
199202

200203
def __datafusion_table_provider__(self):
201204
return self._impl.__datafusion_table_provider__()
@@ -412,7 +415,7 @@ def _scan_default(ctx_impl, obj, schema):
412415

413416

414417
def _scan_collected_default(ctx_impl, obj, schema):
415-
return _scan_default(ctx_impl, obj, schema).collect()
418+
return _scan_default(ctx_impl, obj, schema).to_memtable()
416419

417420

418421
def _scan_geopandas(ctx_impl, obj, schema):

python/sedonadb/python/sedonadb/testing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ def create_or_skip(cls, *args, **kwargs):
324324
return cls(*args, **kwargs)
325325

326326
def create_table_parquet(self, name, paths) -> "SedonaDB":
327-
self.con.read_parquet(paths).collect().to_view(name, overwrite=True)
327+
self.con.read_parquet(paths).to_memtable().to_view(name, overwrite=True)
328328
return self
329329

330330
def create_view_parquet(self, name, paths) -> "SedonaDB":

python/sedonadb/src/dataframe.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,11 @@ impl InternalDataFrame {
111111
Ok(())
112112
}
113113

114-
fn collect<'py>(&self, py: Python<'py>, ctx: &InternalContext) -> Result<Self, PySedonaError> {
114+
fn to_memtable<'py>(
115+
&self,
116+
py: Python<'py>,
117+
ctx: &InternalContext,
118+
) -> Result<Self, PySedonaError> {
115119
let schema = self.inner.schema();
116120
let partitions =
117121
wait_for_future(py, &self.runtime, self.inner.clone().collect_partitioned())??;

python/sedonadb/tests/test_dataframe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,9 @@ def test_schema_non_null_crs(con):
165165
assert df.schema.field("geom").type.crs == gat.OGC_CRS84
166166

167167

168-
def test_collect(con):
168+
def test_to_memtable(con):
169169
df = con.sql("SELECT 1 as one")
170-
pd.testing.assert_frame_equal(df.collect().to_pandas(), df.to_pandas())
170+
pd.testing.assert_frame_equal(df.to_memtable().to_pandas(), df.to_pandas())
171171

172172

173173
def test_to_view(con):

0 commit comments

Comments
 (0)