Skip to content

Commit f0a0e68

Browse files
authored
Data Explorer: Incorporate specific number types into Ark (#935)
* Incorporate specific number types into Ark * Adapt new convert to code code paths to use specific number types * Fix variables tests
1 parent ba62fab commit f0a0e68

File tree

7 files changed

+107
-34
lines changed

7 files changed

+107
-34
lines changed

crates/amalthea/src/comm/data_explorer_comm.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -772,7 +772,19 @@ pub enum ColumnDisplayType {
772772

773773
#[serde(rename = "unknown")]
774774
#[strum(to_string = "unknown")]
775-
Unknown
775+
Unknown,
776+
777+
#[serde(rename = "floating")]
778+
#[strum(to_string = "floating")]
779+
Floating,
780+
781+
#[serde(rename = "integer")]
782+
#[strum(to_string = "integer")]
783+
Integer,
784+
785+
#[serde(rename = "decimal")]
786+
#[strum(to_string = "decimal")]
787+
Decimal
776788
}
777789

778790
/// Possible values for Condition in RowFilter

crates/ark/src/data_explorer/convert_to_code.rs

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,10 @@ fn format_value_for_r(display_type: &ColumnDisplayType, value: &str) -> String {
231231
},
232232

233233
// For numbers, no quotes needed
234-
ColumnDisplayType::Number => value.to_string(),
234+
ColumnDisplayType::Number |
235+
ColumnDisplayType::Integer |
236+
ColumnDisplayType::Floating |
237+
ColumnDisplayType::Decimal => value.to_string(),
235238

236239
// For any other type, default to quoting
237240
_ => escape_character_constant(value),
@@ -616,7 +619,7 @@ mod tests {
616619
];
617620

618621
for (op, expected_op) in test_cases {
619-
let filter = comparison_filter("price", op, "100", ColumnDisplayType::Number);
622+
let filter = comparison_filter("price", op, "100", ColumnDisplayType::Floating);
620623
let result = filter_handler.convert_filter(&filter);
621624
assert_eq!(result, Some(format!("price {} 100", expected_op)));
622625
}
@@ -694,7 +697,7 @@ mod tests {
694697
#[test]
695698
fn test_filter_between() {
696699
let filter_handler = DplyrFilterHandler;
697-
let filter = between_filter("price", "100", "500", ColumnDisplayType::Number);
700+
let filter = between_filter("price", "100", "500", ColumnDisplayType::Floating);
698701

699702
let result = filter_handler.convert_filter(&filter);
700703
assert_eq!(result, Some("price >= 100 & price <= 500".to_string()));
@@ -740,7 +743,7 @@ mod tests {
740743
"price",
741744
FilterComparisonOp::Gt,
742745
"100",
743-
ColumnDisplayType::Number,
746+
ColumnDisplayType::Floating,
744747
),
745748
comparison_filter(
746749
"category",
@@ -765,7 +768,7 @@ mod tests {
765768
"price",
766769
FilterComparisonOp::Gt,
767770
"100",
768-
ColumnDisplayType::Number,
771+
ColumnDisplayType::Floating,
769772
)],
770773
sort_keys: vec![],
771774
code_syntax_name: amalthea::comm::data_explorer_comm::CodeSyntaxName {
@@ -816,7 +819,7 @@ mod tests {
816819
"price",
817820
FilterComparisonOp::Gt,
818821
"100",
819-
ColumnDisplayType::Number,
822+
ColumnDisplayType::Floating,
820823
),
821824
comparison_filter(
822825
"category",
@@ -980,7 +983,7 @@ mod tests {
980983
"2025 score",
981984
FilterComparisonOp::Gt,
982985
"80",
983-
ColumnDisplayType::Number,
986+
ColumnDisplayType::Floating,
984987
)],
985988
sort_keys: vec![],
986989
code_syntax_name: amalthea::comm::data_explorer_comm::CodeSyntaxName {
@@ -1070,7 +1073,7 @@ mod execution_tests {
10701073
column_label: None,
10711074
column_index: 1,
10721075
type_name: "numeric".to_string(),
1073-
type_display: ColumnDisplayType::Number,
1076+
type_display: ColumnDisplayType::Floating,
10741077
description: None,
10751078
children: None,
10761079
precision: None,

crates/ark/src/data_explorer/r_data_explorer.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,9 @@ impl RDataExplorer {
788788

789789
let is_compare_supported = |x: &ColumnDisplayType| match x {
790790
ColumnDisplayType::Number |
791+
ColumnDisplayType::Integer |
792+
ColumnDisplayType::Floating |
793+
ColumnDisplayType::Decimal |
791794
ColumnDisplayType::Date |
792795
ColumnDisplayType::Datetime |
793796
ColumnDisplayType::Time => true,

crates/ark/src/data_explorer/summary_stats.rs

Lines changed: 62 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,16 @@ pub fn summary_stats(
3939
stats.type_display = display_type;
4040
match stats.type_display {
4141
ColumnDisplayType::Number => {
42-
stats.number_stats = Some(summary_stats_number(column, format_options)?);
42+
stats.number_stats = Some(summary_stats_number(column, format_options, false)?);
43+
},
44+
ColumnDisplayType::Integer => {
45+
stats.number_stats = Some(summary_stats_number(column, format_options, true)?);
46+
},
47+
ColumnDisplayType::Floating => {
48+
stats.number_stats = Some(summary_stats_number(column, format_options, false)?);
49+
},
50+
ColumnDisplayType::Decimal => {
51+
stats.number_stats = Some(summary_stats_number(column, format_options, false)?);
4352
},
4453
ColumnDisplayType::String => {
4554
stats.string_stats = Some(summary_stats_string(column)?);
@@ -59,6 +68,7 @@ pub fn summary_stats(
5968
fn summary_stats_number(
6069
column: SEXP,
6170
format_options: &FormatOptions,
71+
is_integer: bool,
6272
) -> anyhow::Result<SummaryStatsNumber> {
6373
let r_stats = call_summary_fn("summary_stats_number", column)?;
6474

@@ -74,13 +84,38 @@ fn summary_stats_number(
7484
})
7585
.collect();
7686

77-
Ok(SummaryStatsNumber {
78-
min_value: r_stats.get("min_value").cloned(),
79-
max_value: r_stats.get("max_value").cloned(),
80-
mean: r_stats.get("mean").cloned(),
81-
median: r_stats.get("median").cloned(),
82-
stdev: r_stats.get("stdev").cloned(),
83-
})
87+
let min_value = r_stats.get("min_value").cloned();
88+
let max_value = r_stats.get("max_value").cloned();
89+
let mean = r_stats.get("mean").cloned();
90+
let median = r_stats.get("median").cloned();
91+
let stdev = r_stats.get("stdev").cloned();
92+
93+
// For integer types, reformat min/max/median as whole numbers without decimal points.
94+
// Mean and stdev keep their decimal formatting since they can be fractional even for integers.
95+
if is_integer {
96+
let reformat_as_whole_number = |value: Option<String>| -> Option<String> {
97+
value.and_then(|v| {
98+
// Remove thousands separator, parse, and format as integer
99+
v.replace(',', "").parse::<f64>().ok().map(|num| format!("{:.0}", num))
100+
})
101+
};
102+
103+
Ok(SummaryStatsNumber {
104+
min_value: reformat_as_whole_number(min_value),
105+
max_value: reformat_as_whole_number(max_value),
106+
mean,
107+
median: reformat_as_whole_number(median),
108+
stdev,
109+
})
110+
} else {
111+
Ok(SummaryStatsNumber {
112+
min_value,
113+
max_value,
114+
mean,
115+
median,
116+
stdev,
117+
})
118+
}
84119
}
85120

86121
fn summary_stats_string(column: SEXP) -> anyhow::Result<SummaryStatsString> {
@@ -201,7 +236,7 @@ mod tests {
201236
crate::r_task(|| {
202237
let column = harp::parse_eval_global("c(1,2,3,4,5, NA)").unwrap();
203238
let stats =
204-
summary_stats(column.sexp, ColumnDisplayType::Number, &default_options()).unwrap();
239+
summary_stats(column.sexp, ColumnDisplayType::Floating, &default_options()).unwrap();
205240
let expected = SummaryStatsNumber {
206241
min_value: Some("1.00".to_string()),
207242
max_value: Some("5.00".to_string()),
@@ -213,12 +248,29 @@ mod tests {
213248
})
214249
}
215250

251+
#[test]
252+
fn test_integer_summary() {
253+
crate::r_task(|| {
254+
let column = harp::parse_eval_global("c(1L, 2L, 3L, 4L, 5L, NA)").unwrap();
255+
let stats =
256+
summary_stats(column.sexp, ColumnDisplayType::Integer, &default_options()).unwrap();
257+
let expected = SummaryStatsNumber {
258+
min_value: Some("1".to_string()),
259+
max_value: Some("5".to_string()),
260+
mean: Some("3.00".to_string()),
261+
median: Some("3".to_string()),
262+
stdev: Some("1.58".to_string()),
263+
};
264+
assert_eq!(stats.number_stats, Some(expected));
265+
})
266+
}
267+
216268
#[test]
217269
fn test_numeric_all_nas() {
218270
crate::r_task(|| {
219271
let column = harp::parse_eval_global("c(NA_real_, NA_real_, NA_real_)").unwrap();
220272
let stats =
221-
summary_stats(column.sexp, ColumnDisplayType::Number, &default_options()).unwrap();
273+
summary_stats(column.sexp, ColumnDisplayType::Floating, &default_options()).unwrap();
222274
let expected = SummaryStatsNumber {
223275
min_value: None,
224276
max_value: None,

crates/ark/src/data_explorer/utils.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,16 +67,16 @@ pub fn display_type(x: SEXP) -> ColumnDisplayType {
6767
}
6868

6969
if r_inherits(x, "integer") {
70-
return ColumnDisplayType::Number;
70+
return ColumnDisplayType::Integer;
7171
}
7272
if r_inherits(x, "double") {
73-
return ColumnDisplayType::Number;
73+
return ColumnDisplayType::Floating;
7474
}
7575
if r_inherits(x, "complex") {
76-
return ColumnDisplayType::Number;
76+
return ColumnDisplayType::Floating;
7777
}
7878
if r_inherits(x, "numeric") {
79-
return ColumnDisplayType::Number;
79+
return ColumnDisplayType::Floating;
8080
}
8181

8282
if r_inherits(x, "character") {
@@ -107,7 +107,9 @@ pub fn display_type(x: SEXP) -> ColumnDisplayType {
107107

108108
match r_typeof(x) {
109109
LGLSXP => return ColumnDisplayType::Boolean,
110-
INTSXP | REALSXP | CPLXSXP => return ColumnDisplayType::Number,
110+
INTSXP => return ColumnDisplayType::Integer,
111+
REALSXP => return ColumnDisplayType::Floating,
112+
CPLXSXP => return ColumnDisplayType::Floating,
111113
STRSXP => return ColumnDisplayType::String,
112114
VECSXP => return ColumnDisplayType::Unknown,
113115
_ => return ColumnDisplayType::Unknown,

crates/ark/tests/data_explorer.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2086,7 +2086,7 @@ fn test_schema_identification() {
20862086
assert_eq!(schema.columns.len(), 6);
20872087

20882088
let expected_types = vec![
2089-
(ColumnDisplayType::Number, "dbl"),
2089+
(ColumnDisplayType::Floating, "dbl"),
20902090
(ColumnDisplayType::String, "str"),
20912091
(ColumnDisplayType::Boolean, "lgl"),
20922092
(ColumnDisplayType::String, "fct(3)"),
@@ -2203,7 +2203,7 @@ fn test_search_schema_data_type_filters() {
22032203

22042204
// Test filter for numeric columns: should match age (int) and score (dbl)
22052205
let req = RequestBuilder::search_schema_data_types(
2206-
vec![ColumnDisplayType::Number],
2206+
vec![ColumnDisplayType::Integer, ColumnDisplayType::Floating],
22072207
SearchSchemaSortOrder::Original,
22082208
);
22092209
TestAssertions::assert_search_matches(socket, req, vec![1, 2]);
@@ -2236,9 +2236,9 @@ fn test_search_schema_data_type_filters() {
22362236
);
22372237
TestAssertions::assert_search_matches(socket, req, vec![0, 3]); // name, is_active
22382238

2239-
// Test filter for all numeric-like types: Number and Date
2239+
// Test filter for all numeric-like types: Integer, Floating and Date
22402240
let req = RequestBuilder::search_schema_data_types(
2241-
vec![ColumnDisplayType::Number, ColumnDisplayType::Date],
2241+
vec![ColumnDisplayType::Integer, ColumnDisplayType::Floating, ColumnDisplayType::Date],
22422242
SearchSchemaSortOrder::Original,
22432243
);
22442244
TestAssertions::assert_search_matches(socket, req, vec![1, 2, 4]); // age, score, date_joined
@@ -2352,7 +2352,8 @@ fn test_search_schema_type_sort_orders() {
23522352

23532353
// Test type sorting with filters - only numeric types (int, dbl)
23542354
let filters = vec![FilterBuilder::match_data_types(vec![
2355-
ColumnDisplayType::Number,
2355+
ColumnDisplayType::Integer,
2356+
ColumnDisplayType::Floating,
23562357
])];
23572358

23582359
// Ascending type sort with filter: dbl first, then int
@@ -2943,7 +2944,7 @@ fn test_empty_data_frame_schema() {
29432944
assert_eq!(schema.columns.len(), 6);
29442945

29452946
let expected_types = vec![
2946-
(ColumnDisplayType::Number, "dbl"),
2947+
(ColumnDisplayType::Floating, "dbl"),
29472948
(ColumnDisplayType::String, "str"),
29482949
(ColumnDisplayType::Boolean, "lgl"),
29492950
(ColumnDisplayType::String, "fct(0)"),

crates/ark/tests/variables.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -626,8 +626,8 @@ fn test_query_table_summary() {
626626
assert_eq!(schemas[2]["column_name"], "character_col");
627627
assert_eq!(schemas[3]["column_name"], "logical_col");
628628

629-
assert_eq!(schemas[0]["type_display"], "number");
630-
assert_eq!(schemas[1]["type_display"], "number");
629+
assert_eq!(schemas[0]["type_display"], "floating");
630+
assert_eq!(schemas[1]["type_display"], "integer");
631631
assert_eq!(schemas[2]["type_display"], "string");
632632
assert_eq!(schemas[3]["type_display"], "boolean");
633633

@@ -702,9 +702,9 @@ fn test_query_table_summary() {
702702
assert_eq!(schemas[1]["column_name"], "col2");
703703
assert_eq!(schemas[2]["column_name"], "col3");
704704

705-
// Matrix should have numeric columns
705+
// Matrix created with 1:12 has integer columns
706706
for schema in &schemas {
707-
assert_eq!(schema["type_display"], "number");
707+
assert_eq!(schema["type_display"], "integer");
708708
}
709709
},
710710
_ => panic!("Expected QueryTableSummaryReply"),

0 commit comments

Comments
 (0)