Skip to content

Commit 854404a

Browse files
JonAnClajc-5slukapeschke
authored
feat: release GIL when possible (#388)
* - release python's GIL when opening an excel file in read_excel() and when reading a sheet "not eagerly" in ExcelReader.build_sheet * - also release the gil in the "eager" case of ExcelReader.build_sheet * - release GIL for reading tables too - re-instate reading range for both eager&not eager code paths for ExcelReader.build_sheet * Update src/types/excelreader/python.rs * feat: release GIL in to_arrow and to_arrow_with_errors as well Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * feat: also release the gil when eagerly loading a sheet Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> --------- Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> Co-authored-by: jonathan <jonathan.clarke@fivesigma.co.uk> Co-authored-by: Luka Peschke <luka.peschke@toucantoco.com>
1 parent 0674458 commit 854404a

File tree

3 files changed

+47
-35
lines changed

3 files changed

+47
-35
lines changed

src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,15 @@ pub fn read_excel<S: AsRef<str> + Display>(path: S) -> FastExcelResult<ExcelRead
3030
#[cfg(feature = "python")]
3131
/// Reads an excel file and returns an object allowing to access its sheets, tables, and a bit of metadata
3232
#[pyfunction(name = "read_excel")]
33-
fn py_read_excel(source: &Bound<'_, PyAny>) -> PyResult<ExcelReader> {
33+
fn py_read_excel<'py>(source: &Bound<'_, PyAny>, py: Python<'py>) -> PyResult<ExcelReader> {
3434
use py_errors::IntoPyResult;
3535

3636
if let Ok(path) = source.extract::<String>() {
37-
ExcelReader::try_from_path(&path)
37+
py.allow_threads(|| ExcelReader::try_from_path(&path))
3838
.with_context(|| format!("could not load excel file at {path}"))
3939
.into_pyresult()
4040
} else if let Ok(bytes) = source.extract::<&[u8]>() {
41-
ExcelReader::try_from(bytes)
41+
py.allow_threads(|| ExcelReader::try_from(bytes))
4242
.with_context(|| "could not load excel file for those bytes")
4343
.into_pyresult()
4444
} else {

src/types/excelreader/python.rs

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -84,24 +84,29 @@ impl ExcelReader {
8484
.to_owned();
8585

8686
if eager && self.sheets.supports_by_ref() {
87-
let range = self
88-
.sheets
89-
.with_header_row(calamine_header_row)
90-
.worksheet_range_ref(&sheet_meta.name)
87+
let range = py
88+
.allow_threads(|| {
89+
self.sheets
90+
.with_header_row(calamine_header_row)
91+
.worksheet_range_ref(&sheet_meta.name)
92+
})
9193
.into_pyresult()?;
9294
let pagination =
9395
Pagination::try_new(opts.skip_rows, opts.n_rows, &range).into_pyresult()?;
9496
let header = Header::new(data_header_row, opts.column_names);
95-
let rb = Self::load_sheet_eager(
96-
&range.into(),
97-
pagination,
98-
header,
99-
opts.schema_sample_rows,
100-
&opts.selected_columns,
101-
opts.dtypes.as_ref(),
102-
&opts.dtype_coercion,
103-
)
104-
.into_pyresult()?;
97+
let rb = py
98+
.allow_threads(|| {
99+
Self::load_sheet_eager(
100+
&range.into(),
101+
pagination,
102+
header,
103+
opts.schema_sample_rows,
104+
&opts.selected_columns,
105+
opts.dtypes.as_ref(),
106+
&opts.dtype_coercion,
107+
)
108+
})
109+
.into_pyresult()?;
105110

106111
#[cfg(feature = "pyarrow")]
107112
{
@@ -115,10 +120,12 @@ impl ExcelReader {
115120
))
116121
}
117122
} else {
118-
let range = self
119-
.sheets
120-
.with_header_row(calamine_header_row)
121-
.worksheet_range(&sheet_meta.name)
123+
let range = py
124+
.allow_threads(|| {
125+
self.sheets
126+
.with_header_row(calamine_header_row)
127+
.worksheet_range(&sheet_meta.name)
128+
})
122129
.into_pyresult()?;
123130
let pagination =
124131
Pagination::try_new(opts.skip_rows, opts.n_rows, &range).into_pyresult()?;
@@ -160,7 +167,9 @@ impl ExcelReader {
160167
eager: bool,
161168
py: Python<'py>,
162169
) -> PyResult<Bound<'py, PyAny>> {
163-
let excel_table = self.load_table(name, opts).into_pyresult()?;
170+
let excel_table = py
171+
.allow_threads(|| self.load_table(name, opts))
172+
.into_pyresult()?;
164173

165174
if eager {
166175
#[cfg(feature = "pyarrow")]

src/types/excelsheet/python.rs

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ impl ExcelSheet {
272272

273273
use crate::error::py_errors::IntoPyResult;
274274

275-
RecordBatch::try_from(self)
275+
py.allow_threads(|| RecordBatch::try_from(self))
276276
.with_context(|| {
277277
format!(
278278
"could not create RecordBatch from sheet \"{}\"",
@@ -305,18 +305,21 @@ impl ExcelSheet {
305305
let offset = self.offset();
306306
let limit = self.limit();
307307

308-
let (rb, errors) = record_batch_from_data_and_columns_with_errors(
309-
&self.selected_columns,
310-
self.data(),
311-
offset,
312-
limit,
313-
)
314-
.with_context(|| {
315-
format!(
316-
"could not create RecordBatch from sheet \"{}\"",
317-
self.name()
318-
)
319-
})?;
308+
let (rb, errors) = py
309+
.allow_threads(|| {
310+
record_batch_from_data_and_columns_with_errors(
311+
&self.selected_columns,
312+
self.data(),
313+
offset,
314+
limit,
315+
)
316+
})
317+
.with_context(|| {
318+
format!(
319+
"could not create RecordBatch from sheet \"{}\"",
320+
self.name()
321+
)
322+
})?;
320323

321324
let rb = rb
322325
.into_pyarrow(py)

0 commit comments

Comments
 (0)