Skip to content

Commit 653ded5

Browse files
authored
Data Explorer: Export column labels to data explorer frontend (#909)
1 parent db6765e commit 653ded5

File tree

3 files changed

+255
-1
lines changed

3 files changed

+255
-1
lines changed

crates/amalthea/src/comm/data_explorer_comm.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,9 @@ pub struct ColumnSchema {
103103
/// Name of column as UTF-8 string
104104
pub column_name: String,
105105

106+
/// Display label for column (e.g., from R's label attribute)
107+
pub column_label: Option<String>,
108+
106109
/// The position of the column within the table without any column filters
107110
pub column_index: i64,
108111

@@ -690,7 +693,7 @@ pub struct ColumnSelection {
690693
}
691694

692695
/// Possible values for SortOrder in SearchSchema
693-
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, strum_macros::Display)]
696+
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, strum_macros::Display, strum_macros::EnumString)]
694697
pub enum SearchSchemaSortOrder {
695698
#[serde(rename = "original")]
696699
#[strum(to_string = "original")]

crates/ark/src/data_explorer/r_data_explorer.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ use harp::object::RObject;
7474
use harp::r_symbol;
7575
use harp::table_kind;
7676
use harp::tbl_get_column;
77+
use harp::vector::CharacterVector;
78+
use harp::vector::Vector;
7779
use harp::ColumnNames;
7880
use harp::TableKind;
7981
use itertools::Itertools;
@@ -619,8 +621,32 @@ impl RDataExplorer {
619621
let type_name = WorkspaceVariableDisplayType::from(col, false).display_type;
620622
let type_display = display_type(col);
621623

624+
// Get the label attribute if present (for data frames only)
625+
let column_label = match kind {
626+
harp::TableKind::Dataframe => {
627+
let col_obj = harp::RObject::view(col);
628+
col_obj.get_attribute("label").and_then(|label_obj| {
629+
// CharacterVector::new() already checks if it's a STRSXP
630+
CharacterVector::new(label_obj.sexp)
631+
.ok()
632+
.filter(|cv| cv.len() > 0) // Only proceed if non-empty
633+
.and_then(|cv| cv.get_unchecked(0))
634+
.and_then(|label| {
635+
// Filter out empty strings - treat them as no label
636+
if label.trim().is_empty() {
637+
None
638+
} else {
639+
Some(label.to_string())
640+
}
641+
})
642+
})
643+
},
644+
_ => None,
645+
};
646+
622647
column_schemas.push(ColumnSchema {
623648
column_name,
649+
column_label,
624650
column_index: i as i64,
625651
type_name,
626652
type_display,

crates/ark/tests/data_explorer.rs

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2536,3 +2536,228 @@ fn test_search_schema_edge_cases() {
25362536
);
25372537
TestAssertions::assert_search_matches(socket, req, vec![0, 1, 2]); // All variations
25382538
}
2539+
2540+
#[test]
2541+
fn test_column_labels() {
2542+
let _lock = r_test_lock();
2543+
2544+
// Create a data frame with column labels
2545+
r_task(|| {
2546+
harp::parse_eval_global(
2547+
r#"
2548+
df_with_labels <- data.frame(
2549+
age = c(25, 30, 35),
2550+
income = c(50000, 60000, 70000),
2551+
score = c(85.5, 92.0, 88.5)
2552+
)
2553+
attr(df_with_labels$age, "label") <- "Age in years"
2554+
attr(df_with_labels$income, "label") <- "Annual income (USD)"
2555+
attr(df_with_labels$score, "label") <- "Test score percentage"
2556+
"#,
2557+
)
2558+
.unwrap();
2559+
});
2560+
2561+
let setup = TestSetup::new("df_with_labels");
2562+
let socket = setup.socket();
2563+
2564+
// Get schema and verify column labels are present
2565+
let req = RequestBuilder::get_schema(vec![0, 1, 2]);
2566+
assert_match!(socket_rpc(socket, req),
2567+
DataExplorerBackendReply::GetSchemaReply(schema) => {
2568+
assert_eq!(schema.columns.len(), 3);
2569+
2570+
// Check first column
2571+
assert_eq!(schema.columns[0].column_name, "age");
2572+
assert_eq!(schema.columns[0].column_label, Some("Age in years".to_string()));
2573+
2574+
// Check second column
2575+
assert_eq!(schema.columns[1].column_name, "income");
2576+
assert_eq!(schema.columns[1].column_label, Some("Annual income (USD)".to_string()));
2577+
2578+
// Check third column
2579+
assert_eq!(schema.columns[2].column_name, "score");
2580+
assert_eq!(schema.columns[2].column_label, Some("Test score percentage".to_string()));
2581+
}
2582+
);
2583+
2584+
// Clean up
2585+
r_task(|| {
2586+
harp::parse_eval_global("rm(df_with_labels)").unwrap();
2587+
});
2588+
}
2589+
2590+
#[test]
2591+
fn test_column_labels_missing() {
2592+
let _lock = r_test_lock();
2593+
2594+
// Create a data frame without column labels
2595+
r_task(|| {
2596+
harp::parse_eval_global(
2597+
r#"
2598+
df_no_labels <- data.frame(
2599+
x = 1:3,
2600+
y = 4:6,
2601+
z = 7:9
2602+
)
2603+
"#,
2604+
)
2605+
.unwrap();
2606+
});
2607+
2608+
let setup = TestSetup::new("df_no_labels");
2609+
let socket = setup.socket();
2610+
2611+
// Get schema and verify column labels are None
2612+
let req = RequestBuilder::get_schema(vec![0, 1, 2]);
2613+
assert_match!(socket_rpc(socket, req),
2614+
DataExplorerBackendReply::GetSchemaReply(schema) => {
2615+
assert_eq!(schema.columns.len(), 3);
2616+
2617+
// All columns should have no labels
2618+
assert_eq!(schema.columns[0].column_name, "x");
2619+
assert_eq!(schema.columns[0].column_label, None);
2620+
2621+
assert_eq!(schema.columns[1].column_name, "y");
2622+
assert_eq!(schema.columns[1].column_label, None);
2623+
2624+
assert_eq!(schema.columns[2].column_name, "z");
2625+
assert_eq!(schema.columns[2].column_label, None);
2626+
}
2627+
);
2628+
2629+
// Clean up
2630+
r_task(|| {
2631+
harp::parse_eval_global("rm(df_no_labels)").unwrap();
2632+
});
2633+
}
2634+
2635+
#[test]
2636+
fn test_column_labels_haven_compatibility() {
2637+
let _lock = r_test_lock();
2638+
2639+
// Test with haven::labelled vectors if haven is available
2640+
r_task(|| {
2641+
harp::parse_eval_global(
2642+
r#"
2643+
# Try to load haven; skip if not available
2644+
if (require(haven, quietly = TRUE)) {
2645+
df_haven <- data.frame(
2646+
basic = 1:3,
2647+
labelled_var = haven::labelled(c(1, 2, 3), label = "Labelled numeric variable")
2648+
)
2649+
# Also add a regular label attribute for comparison
2650+
attr(df_haven$basic, "label") <- "Basic variable with regular label"
2651+
haven_available <- TRUE
2652+
} else {
2653+
# Fallback: create a data frame that simulates haven::labelled behavior
2654+
df_haven <- data.frame(
2655+
basic = 1:3,
2656+
labelled_var = c(1, 2, 3)
2657+
)
2658+
attr(df_haven$basic, "label") <- "Basic variable with regular label"
2659+
attr(df_haven$labelled_var, "label") <- "Labelled numeric variable"
2660+
class(df_haven$labelled_var) <- c("haven_labelled", "vctrs_vctr", "double")
2661+
haven_available <- FALSE
2662+
}
2663+
"#,
2664+
)
2665+
.unwrap();
2666+
});
2667+
2668+
let setup = TestSetup::new("df_haven");
2669+
let socket = setup.socket();
2670+
2671+
// Get schema and verify column labels work with both regular and haven labelled columns
2672+
let req = RequestBuilder::get_schema(vec![0, 1]);
2673+
assert_match!(socket_rpc(socket, req),
2674+
DataExplorerBackendReply::GetSchemaReply(schema) => {
2675+
assert_eq!(schema.columns.len(), 2);
2676+
2677+
// Check basic column with regular label
2678+
assert_eq!(schema.columns[0].column_name, "basic");
2679+
assert_eq!(schema.columns[0].column_label, Some("Basic variable with regular label".to_string()));
2680+
2681+
// Check haven::labelled column
2682+
assert_eq!(schema.columns[1].column_name, "labelled_var");
2683+
assert_eq!(schema.columns[1].column_label, Some("Labelled numeric variable".to_string()));
2684+
}
2685+
);
2686+
2687+
// Clean up
2688+
r_task(|| {
2689+
harp::parse_eval_global("rm(df_haven, haven_available)").unwrap();
2690+
});
2691+
}
2692+
2693+
#[test]
2694+
fn test_column_labels_edge_cases() {
2695+
let _lock = r_test_lock();
2696+
2697+
// Test edge cases: empty labels, non-character labels, multiple labels, etc.
2698+
r_task(|| {
2699+
harp::parse_eval_global(
2700+
r#"
2701+
df_edge_cases <- data.frame(
2702+
normal = c(1, 2, 3),
2703+
empty_label = c(4, 5, 6),
2704+
numeric_label = c(7, 8, 9),
2705+
multiple_labels = c(10, 11, 12),
2706+
null_label = c(13, 14, 15)
2707+
)
2708+
2709+
# Normal case
2710+
attr(df_edge_cases$normal, "label") <- "Normal label"
2711+
2712+
# Empty string label
2713+
attr(df_edge_cases$empty_label, "label") <- ""
2714+
2715+
# Numeric label (should be ignored/converted safely)
2716+
attr(df_edge_cases$numeric_label, "label") <- 42
2717+
2718+
# Multiple character labels (should take first one)
2719+
attr(df_edge_cases$multiple_labels, "label") <- c("First label", "Second label")
2720+
2721+
# NULL label (should result in None)
2722+
attr(df_edge_cases$null_label, "label") <- NULL
2723+
"#,
2724+
)
2725+
.unwrap();
2726+
});
2727+
2728+
let setup = TestSetup::new("df_edge_cases");
2729+
let socket = setup.socket();
2730+
2731+
// Get schema and verify edge cases are handled correctly
2732+
let req = RequestBuilder::get_schema(vec![0, 1, 2, 3, 4]);
2733+
assert_match!(socket_rpc(socket, req),
2734+
DataExplorerBackendReply::GetSchemaReply(schema) => {
2735+
assert_eq!(schema.columns.len(), 5);
2736+
2737+
// Normal case
2738+
assert_eq!(schema.columns[0].column_name, "normal");
2739+
assert_eq!(schema.columns[0].column_label, Some("Normal label".to_string()));
2740+
2741+
// Empty label should be treated as no label (None)
2742+
assert_eq!(schema.columns[1].column_name, "empty_label");
2743+
assert_eq!(schema.columns[1].column_label, None);
2744+
2745+
// Numeric label should be ignored (None)
2746+
assert_eq!(schema.columns[2].column_name, "numeric_label");
2747+
assert_eq!(schema.columns[2].column_label, None);
2748+
2749+
// Multiple labels should take the first one
2750+
assert_eq!(schema.columns[3].column_name, "multiple_labels");
2751+
assert_eq!(schema.columns[3].column_label, Some("First label".to_string()));
2752+
2753+
// NULL label should be None
2754+
assert_eq!(schema.columns[4].column_name, "null_label");
2755+
assert_eq!(schema.columns[4].column_label, None);
2756+
}
2757+
);
2758+
2759+
// Clean up
2760+
r_task(|| {
2761+
harp::parse_eval_global("rm(df_edge_cases)").unwrap();
2762+
});
2763+
}

0 commit comments

Comments
 (0)