Skip to content

Commit 62d0265

Browse files
committed
refactor: Simplify HTML generation in PyDataFrame by extracting functions
- Replaced inline HTML string construction with dedicated functions for style definitions, table headers, rows, and cell formatting. - Improved readability and maintainability of the code by modularizing HTML generation logic. - Enhanced the handling of large cell content with expandable text features.
1 parent d0315ff commit 62d0265

File tree

1 file changed

+148
-100
lines changed

1 file changed

+148
-100
lines changed

src/dataframe.rs

Lines changed: 148 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -152,111 +152,23 @@ impl PyDataFrame {
152152

153153
let table_uuid = uuid::Uuid::new_v4().to_string();
154154

155-
let mut html_str = "
156-
<style>
157-
.expandable-container {
158-
display: inline-block;
159-
max-width: 200px;
160-
}
161-
.expandable {
162-
white-space: nowrap;
163-
overflow: hidden;
164-
text-overflow: ellipsis;
165-
display: block;
166-
}
167-
.full-text {
168-
display: none;
169-
white-space: normal;
170-
}
171-
.expand-btn {
172-
cursor: pointer;
173-
color: blue;
174-
text-decoration: underline;
175-
border: none;
176-
background: none;
177-
font-size: inherit;
178-
display: block;
179-
margin-top: 5px;
180-
}
181-
</style>
182-
183-
<div style=\"width: 100%; max-width: 1000px; max-height: 300px; overflow: auto; border: 1px solid #ccc;\">
184-
<table style=\"border-collapse: collapse; min-width: 100%\">
185-
<thead>\n".to_string();
186-
187-
let schema = batches[0].schema();
155+
let mut html_str = String::new();
156+
html_str.push_str(&get_html_style_definitions());
157+
html_str.push_str(&get_html_table_opening());
188158

189-
let mut header = Vec::new();
190-
for field in schema.fields() {
191-
header.push(format!("<th style='border: 1px solid black; padding: 8px; text-align: left; background-color: #f2f2f2; white-space: nowrap; min-width: fit-content; max-width: fit-content;'>{}</th>", field.name()));
192-
}
193-
let header_str = header.join("");
194-
html_str.push_str(&format!("<tr>{}</tr></thead><tbody>\n", header_str));
195-
196-
let batch_formatters = batches
197-
.iter()
198-
.map(|batch| {
199-
batch
200-
.columns()
201-
.iter()
202-
.map(|c| ArrayFormatter::try_new(c.as_ref(), &FormatOptions::default()))
203-
.map(|c| {
204-
c.map_err(|e| PyValueError::new_err(format!("Error: {:?}", e.to_string())))
205-
})
206-
.collect::<Result<Vec<_>, _>>()
207-
})
208-
.collect::<Result<Vec<_>, _>>()?;
159+
html_str.push_str(&get_html_table_header(&batches[0].schema()));
209160

161+
let batch_formatters = create_batch_formatters(&batches)?;
210162
let rows_per_batch = batches.iter().map(|batch| batch.num_rows());
211163

212-
// We need to build up row by row for html
213-
let mut table_row = 0;
214-
for (batch_formatter, num_rows_in_batch) in batch_formatters.iter().zip(rows_per_batch) {
215-
for batch_row in 0..num_rows_in_batch {
216-
table_row += 1;
217-
let mut cells = Vec::new();
218-
for (col, formatter) in batch_formatter.iter().enumerate() {
219-
let cell_data = formatter.value(batch_row).to_string();
220-
// From testing, primitive data types do not typically get larger than 21 characters
221-
if cell_data.len() > MAX_LENGTH_CELL_WITHOUT_MINIMIZE {
222-
let short_cell_data = &cell_data[0..MAX_LENGTH_CELL_WITHOUT_MINIMIZE];
223-
cells.push(format!("
224-
<td style='border: 1px solid black; padding: 8px; text-align: left; white-space: nowrap;'>
225-
<div class=\"expandable-container\">
226-
<span class=\"expandable\" id=\"{table_uuid}-min-text-{table_row}-{col}\">{short_cell_data}</span>
227-
<span class=\"full-text\" id=\"{table_uuid}-full-text-{table_row}-{col}\">{cell_data}</span>
228-
<button class=\"expand-btn\" onclick=\"toggleDataFrameCellText('{table_uuid}',{table_row},{col})\">...</button>
229-
</div>
230-
</td>"));
231-
} else {
232-
cells.push(format!("<td style='border: 1px solid black; padding: 8px; text-align: left; white-space: nowrap;'>{}</td>", formatter.value(batch_row)));
233-
}
234-
}
235-
let row_str = cells.join("");
236-
html_str.push_str(&format!("<tr>{}</tr>\n", row_str));
237-
}
238-
}
239-
html_str.push_str("</tbody></table></div>\n");
164+
html_str.push_str(&get_html_table_rows(
165+
&batch_formatters,
166+
rows_per_batch,
167+
&table_uuid,
168+
)?);
240169

241-
html_str.push_str("
242-
<script>
243-
function toggleDataFrameCellText(table_uuid, row, col) {
244-
var shortText = document.getElementById(table_uuid + \"-min-text-\" + row + \"-\" + col);
245-
var fullText = document.getElementById(table_uuid + \"-full-text-\" + row + \"-\" + col);
246-
var button = event.target;
247-
248-
if (fullText.style.display === \"none\") {
249-
shortText.style.display = \"none\";
250-
fullText.style.display = \"inline\";
251-
button.textContent = \"(less)\";
252-
} else {
253-
shortText.style.display = \"inline\";
254-
fullText.style.display = \"none\";
255-
button.textContent = \"...\";
256-
}
257-
}
258-
</script>
259-
");
170+
html_str.push_str("</tbody></table></div>\n");
171+
html_str.push_str(&get_html_js_functions());
260172

261173
if has_more {
262174
html_str.push_str("Data truncated due to size.");
@@ -951,3 +863,139 @@ async fn collect_record_batches_to_display(
951863

952864
Ok((record_batches, has_more))
953865
}
866+
867+
/// Returns the HTML style definitions for the table
868+
fn get_html_style_definitions() -> String {
869+
"
870+
<style>
871+
.expandable-container {
872+
display: inline-block;
873+
max-width: 200px;
874+
}
875+
.expandable {
876+
white-space: nowrap;
877+
overflow: hidden;
878+
text-overflow: ellipsis;
879+
display: block;
880+
}
881+
.full-text {
882+
display: none;
883+
white-space: normal;
884+
}
885+
.expand-btn {
886+
cursor: pointer;
887+
color: blue;
888+
text-decoration: underline;
889+
border: none;
890+
background: none;
891+
font-size: inherit;
892+
display: block;
893+
margin-top: 5px;
894+
}
895+
</style>
896+
897+
<div style=\"width: 100%; max-width: 1000px; max-height: 300px; overflow: auto; border: 1px solid #ccc;\">
898+
<table style=\"border-collapse: collapse; min-width: 100%\">
899+
".to_string()
900+
}
901+
902+
/// Returns the opening HTML table tags
903+
fn get_html_table_opening() -> String {
904+
"<thead>\n".to_string()
905+
}
906+
907+
/// Returns the HTML table headers based on the schema
908+
fn get_html_table_header(schema: &Schema) -> String {
909+
let mut header = Vec::new();
910+
for field in schema.fields() {
911+
header.push(format!("<th style='border: 1px solid black; padding: 8px; text-align: left; background-color: #f2f2f2; white-space: nowrap; min-width: fit-content; max-width: fit-content;'>{}</th>", field.name()));
912+
}
913+
let header_str = header.join("");
914+
format!("<tr>{}</tr></thead><tbody>\n", header_str)
915+
}
916+
917+
/// Creates array formatters for each batch
918+
fn create_batch_formatters(
919+
batches: &[RecordBatch],
920+
) -> PyDataFusionResult<Vec<Vec<ArrayFormatter<'_>>>> {
921+
batches
922+
.iter()
923+
.map(|batch| {
924+
batch
925+
.columns()
926+
.iter()
927+
.map(|c| ArrayFormatter::try_new(c.as_ref(), &FormatOptions::default()))
928+
.map(|c| {
929+
c.map_err(|e| PyValueError::new_err(format!("Error: {:?}", e.to_string())))
930+
})
931+
.collect::<Result<Vec<_>, _>>()
932+
})
933+
.collect::<Result<Vec<_>, _>>()
934+
.map_err(PyDataFusionError::from)
935+
}
936+
937+
/// Returns the HTML table rows based on the batch formatters
938+
fn get_html_table_rows(
939+
batch_formatters: &[Vec<ArrayFormatter<'_>>],
940+
rows_per_batch: impl Iterator<Item = usize>,
941+
table_uuid: &str,
942+
) -> PyDataFusionResult<String> {
943+
let mut html_str = String::new();
944+
let mut table_row = 0;
945+
946+
for (batch_formatter, num_rows_in_batch) in batch_formatters.iter().zip(rows_per_batch) {
947+
for batch_row in 0..num_rows_in_batch {
948+
table_row += 1;
949+
let mut cells = Vec::new();
950+
for (col, formatter) in batch_formatter.iter().enumerate() {
951+
let cell_data = formatter.value(batch_row).to_string();
952+
cells.push(format_table_cell(cell_data, table_uuid, table_row, col));
953+
}
954+
let row_str = cells.join("");
955+
html_str.push_str(&format!("<tr>{}</tr>\n", row_str));
956+
}
957+
}
958+
959+
Ok(html_str)
960+
}
961+
962+
/// Formats a single table cell, handling large content with expansion buttons
963+
fn format_table_cell(cell_data: String, table_uuid: &str, table_row: usize, col: usize) -> String {
964+
if cell_data.len() > MAX_LENGTH_CELL_WITHOUT_MINIMIZE {
965+
let short_cell_data = &cell_data[0..MAX_LENGTH_CELL_WITHOUT_MINIMIZE];
966+
format!("
967+
<td style='border: 1px solid black; padding: 8px; text-align: left; white-space: nowrap;'>
968+
<div class=\"expandable-container\">
969+
<span class=\"expandable\" id=\"{table_uuid}-min-text-{table_row}-{col}\">{short_cell_data}</span>
970+
<span class=\"full-text\" id=\"{table_uuid}-full-text-{table_row}-{col}\">{cell_data}</span>
971+
<button class=\"expand-btn\" onclick=\"toggleDataFrameCellText('{table_uuid}',{table_row},{col})\">...</button>
972+
</div>
973+
</td>")
974+
} else {
975+
format!("<td style='border: 1px solid black; padding: 8px; text-align: left; white-space: nowrap;'>{}</td>", cell_data)
976+
}
977+
}
978+
979+
/// Returns the JavaScript functions for handling cell text expansion
980+
fn get_html_js_functions() -> String {
981+
"
982+
<script>
983+
function toggleDataFrameCellText(table_uuid, row, col) {
984+
var shortText = document.getElementById(table_uuid + \"-min-text-\" + row + \"-\" + col);
985+
var fullText = document.getElementById(table_uuid + \"-full-text-\" + row + \"-\" + col);
986+
var button = event.target;
987+
988+
if (fullText.style.display === \"none\") {
989+
shortText.style.display = \"none\";
990+
fullText.style.display = \"inline\";
991+
button.textContent = \"(less)\";
992+
} else {
993+
shortText.style.display = \"inline\";
994+
fullText.style.display = \"none\";
995+
button.textContent = \"...\";
996+
}
997+
}
998+
</script>
999+
"
1000+
.to_string()
1001+
}

0 commit comments

Comments
 (0)