@@ -73,9 +73,6 @@ impl PyTableProvider {
7373 PyTable :: new ( table_provider)
7474 }
7575}
76- const MAX_TABLE_BYTES_TO_DISPLAY : usize = 2 * 1024 * 1024 ; // 2 MB
77- const MIN_TABLE_ROWS_TO_DISPLAY : usize = 20 ;
78- const MAX_LENGTH_CELL_WITHOUT_MINIMIZE : usize = 25 ;
7976
8077/// A PyDataFrame is a representation of a logical plan and an API to compose statements.
8178/// Use it to build a plan and `.collect()` to execute the plan and collect the result.
@@ -84,15 +81,15 @@ const MAX_LENGTH_CELL_WITHOUT_MINIMIZE: usize = 25;
8481#[ derive( Clone ) ]
8582pub struct PyDataFrame {
8683 df : Arc < DataFrame > ,
87- display_config : PyDataframeDisplayConfig ,
84+ display_config : Arc < PyDataframeDisplayConfig > ,
8885}
8986
9087impl PyDataFrame {
9188 /// creates a new PyDataFrame
9289 pub fn new ( df : DataFrame , display_config : PyDataframeDisplayConfig ) -> Self {
9390 Self {
9491 df : Arc :: new ( df) ,
95- display_config,
92+ display_config : Arc :: new ( display_config ) ,
9693 }
9794 }
9895}
@@ -121,9 +118,23 @@ impl PyDataFrame {
121118 }
122119
123120 fn __repr__ ( & self , py : Python ) -> PyDataFusionResult < String > {
121+ // Get display configuration values
122+ let min_rows = self . display_config . min_table_rows ;
123+ let max_rows = self . display_config . max_table_rows_in_repr ;
124+ let max_bytes = self . display_config . max_table_bytes ;
125+
126+ // Collect record batches for display
127+ let ( batches, has_more) = wait_for_future (
128+ py,
129+ collect_record_batches_to_display (
130+ self . df . as_ref ( ) . clone ( ) ,
131+ self . display_config . min_table_rows ,
132+ self . display_config . max_table_rows_in_repr ,
133+ self . display_config . max_table_bytes ,
134+ ) ,
124135 let ( batches, has_more) = wait_for_future (
125136 py,
126- collect_record_batches_to_display ( self . df . as_ref ( ) . clone ( ) , 10 , 10 ) ,
137+ self . display_config . min_table_rows , self . display_config . max_table_rows_in_repr , self . display_config . max_table_bytes ) ,
127138 ) ?;
128139 if batches. is_empty ( ) {
129140 // This should not be reached, but do it for safety since we index into the vector below
@@ -146,8 +157,9 @@ impl PyDataFrame {
146157 py,
147158 collect_record_batches_to_display (
148159 self . df . as_ref ( ) . clone ( ) ,
149- MIN_TABLE_ROWS_TO_DISPLAY ,
150- usize:: MAX ,
160+ self . display_config . min_table_rows ,
161+ self . display_config . max_table_rows_in_repr ,
162+ self . display_config . max_table_bytes ,
151163 ) ,
152164 ) ?;
153165 if batches. is_empty ( ) {
@@ -223,8 +235,8 @@ impl PyDataFrame {
223235 for ( col, formatter) in batch_formatter. iter ( ) . enumerate ( ) {
224236 let cell_data = formatter. value ( batch_row) . to_string ( ) ;
225237 // From testing, primitive data types do not typically get larger than 21 characters
226- if cell_data. len ( ) > MAX_LENGTH_CELL_WITHOUT_MINIMIZE {
227- let short_cell_data = & cell_data[ 0 ..MAX_LENGTH_CELL_WITHOUT_MINIMIZE ] ;
238+ if cell_data. len ( ) > self . display_config . max_cell_length {
239+ let short_cell_data = & cell_data[ 0 ..self . display_config . max_cell_length ] ;
228240 cells. push ( format ! ( "
229241 <td style='border: 1px solid black; padding: 8px; text-align: left; white-space: nowrap;'>
230242 <div class=\" expandable-container\" >
@@ -891,6 +903,7 @@ async fn collect_record_batches_to_display(
891903 df : DataFrame ,
892904 min_rows : usize ,
893905 max_rows : usize ,
906+ max_bytes : usize ,
894907) -> Result < ( Vec < RecordBatch > , bool ) , DataFusionError > {
895908 let partitioned_stream = df. execute_stream_partitioned ( ) . await ?;
896909 let mut stream = futures:: stream:: iter ( partitioned_stream) . flatten ( ) ;
@@ -899,7 +912,7 @@ async fn collect_record_batches_to_display(
899912 let mut record_batches = Vec :: default ( ) ;
900913 let mut has_more = false ;
901914
902- while ( size_estimate_so_far < MAX_TABLE_BYTES_TO_DISPLAY && rows_so_far < max_rows)
915+ while ( size_estimate_so_far < max_bytes && rows_so_far < max_rows)
903916 || rows_so_far < min_rows
904917 {
905918 let mut rb = match stream. next ( ) . await {
@@ -914,8 +927,8 @@ async fn collect_record_batches_to_display(
914927 if rows_in_rb > 0 {
915928 size_estimate_so_far += rb. get_array_memory_size ( ) ;
916929
917- if size_estimate_so_far > MAX_TABLE_BYTES_TO_DISPLAY {
918- let ratio = MAX_TABLE_BYTES_TO_DISPLAY as f32 / size_estimate_so_far as f32 ;
930+ if size_estimate_so_far > max_bytes {
931+ let ratio = max_bytes as f32 / size_estimate_so_far as f32 ;
919932 let total_rows = rows_in_rb + rows_so_far;
920933
921934 let mut reduced_row_num = ( total_rows as f32 * ratio) . round ( ) as usize ;
0 commit comments