@@ -24,6 +24,7 @@ use arrow::compute::can_cast_types;
2424use arrow:: error:: ArrowError ;
2525use arrow:: ffi:: FFI_ArrowSchema ;
2626use arrow:: ffi_stream:: FFI_ArrowArrayStream ;
27+ use arrow:: pyarrow:: FromPyArrow ;
2728use datafusion:: arrow:: datatypes:: Schema ;
2829use datafusion:: arrow:: pyarrow:: { PyArrowType , ToPyArrow } ;
2930use datafusion:: arrow:: util:: pretty;
@@ -295,6 +296,46 @@ impl PyDataFrame {
295296 pub fn new ( df : DataFrame ) -> Self {
296297 Self { df : Arc :: new ( df) }
297298 }
299+
300+ fn prepare_repr_string ( & self , py : Python , as_html : bool ) -> PyDataFusionResult < String > {
301+ // Get the Python formatter and config
302+ let PythonFormatter { formatter, config } = get_python_formatter_with_config ( py) ?;
303+ let ( batches, has_more) = wait_for_future (
304+ py,
305+ collect_record_batches_to_display ( self . df . as_ref ( ) . clone ( ) , config) ,
306+ ) ??;
307+ if batches. is_empty ( ) {
308+ // This should not be reached, but do it for safety since we index into the vector below
309+ return Ok ( "No data to display" . to_string ( ) ) ;
310+ }
311+
312+ let table_uuid = uuid:: Uuid :: new_v4 ( ) . to_string ( ) ;
313+
314+ // Convert record batches to PyObject list
315+ let py_batches = batches
316+ . into_iter ( )
317+ . map ( |rb| rb. to_pyarrow ( py) )
318+ . collect :: < PyResult < Vec < PyObject > > > ( ) ?;
319+
320+ let py_schema = self . schema ( ) . into_pyobject ( py) ?;
321+
322+ let kwargs = pyo3:: types:: PyDict :: new ( py) ;
323+ let py_batches_list = PyList :: new ( py, py_batches. as_slice ( ) ) ?;
324+ kwargs. set_item ( "batches" , py_batches_list) ?;
325+ kwargs. set_item ( "schema" , py_schema) ?;
326+ kwargs. set_item ( "has_more" , has_more) ?;
327+ kwargs. set_item ( "table_uuid" , table_uuid) ?;
328+
329+ let method_name = match as_html {
330+ true => "format_html" ,
331+ false => "format_str" ,
332+ } ;
333+
334+ let html_result = formatter. call_method ( method_name, ( ) , Some ( & kwargs) ) ?;
335+ let html_str: String = html_result. extract ( ) ?;
336+
337+ Ok ( html_str)
338+ }
298339}
299340
300341#[ pymethods]
@@ -321,18 +362,27 @@ impl PyDataFrame {
321362 }
322363
323364 fn __repr__ ( & self , py : Python ) -> PyDataFusionResult < String > {
324- // Get the Python formatter config
325- let PythonFormatter {
326- formatter : _,
327- config,
328- } = get_python_formatter_with_config ( py) ?;
329- let ( batches, has_more) = wait_for_future (
330- py,
331- collect_record_batches_to_display ( self . df . as_ref ( ) . clone ( ) , config) ,
332- ) ??;
365+ self . prepare_repr_string ( py, false )
366+ }
367+
368+ #[ staticmethod]
369+ #[ expect( unused_variables) ]
370+ fn default_str_repr < ' py > (
371+ batches : Vec < Bound < ' py , PyAny > > ,
372+ schema : & Bound < ' py , PyAny > ,
373+ has_more : bool ,
374+ table_uuid : & str ,
375+ ) -> PyResult < String > {
376+ let batches = batches
377+ . into_iter ( )
378+ . map ( |batch| RecordBatch :: from_pyarrow_bound ( & batch) )
379+ . collect :: < PyResult < Vec < RecordBatch > > > ( ) ?
380+ . into_iter ( )
381+ . filter ( |batch| batch. num_rows ( ) > 0 )
382+ . collect :: < Vec < _ > > ( ) ;
383+
333384 if batches. is_empty ( ) {
334- // This should not be reached, but do it for safety since we index into the vector below
335- return Ok ( "No data to display" . to_string ( ) ) ;
385+ return Ok ( "No data to display" . to_owned ( ) ) ;
336386 }
337387
338388 let batches_as_displ =
@@ -347,38 +397,7 @@ impl PyDataFrame {
347397 }
348398
349399 fn _repr_html_ ( & self , py : Python ) -> PyDataFusionResult < String > {
350- // Get the Python formatter and config
351- let PythonFormatter { formatter, config } = get_python_formatter_with_config ( py) ?;
352- let ( batches, has_more) = wait_for_future (
353- py,
354- collect_record_batches_to_display ( self . df . as_ref ( ) . clone ( ) , config) ,
355- ) ??;
356- if batches. is_empty ( ) {
357- // This should not be reached, but do it for safety since we index into the vector below
358- return Ok ( "No data to display" . to_string ( ) ) ;
359- }
360-
361- let table_uuid = uuid:: Uuid :: new_v4 ( ) . to_string ( ) ;
362-
363- // Convert record batches to PyObject list
364- let py_batches = batches
365- . into_iter ( )
366- . map ( |rb| rb. to_pyarrow ( py) )
367- . collect :: < PyResult < Vec < PyObject > > > ( ) ?;
368-
369- let py_schema = self . schema ( ) . into_pyobject ( py) ?;
370-
371- let kwargs = pyo3:: types:: PyDict :: new ( py) ;
372- let py_batches_list = PyList :: new ( py, py_batches. as_slice ( ) ) ?;
373- kwargs. set_item ( "batches" , py_batches_list) ?;
374- kwargs. set_item ( "schema" , py_schema) ?;
375- kwargs. set_item ( "has_more" , has_more) ?;
376- kwargs. set_item ( "table_uuid" , table_uuid) ?;
377-
378- let html_result = formatter. call_method ( "format_html" , ( ) , Some ( & kwargs) ) ?;
379- let html_str: String = html_result. extract ( ) ?;
380-
381- Ok ( html_str)
400+ self . prepare_repr_string ( py, true )
382401 }
383402
384403 /// Calculate summary statistics for a DataFrame
0 commit comments