Skip to content

Commit c2c981a

Browse files
authored
data explorer: set edges to value of single bin (#884)
* set edges to value of single bin * update changed behaviour
1 parent 0607577 commit c2c981a

File tree

3 files changed

+58
-2
lines changed

3 files changed

+58
-2
lines changed

crates/ark/src/data_explorer/histogram.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -379,8 +379,8 @@ mod tests {
379379
fn test_constant_column() {
380380
r_task(|| {
381381
// This is the default `hist` behavior, single bin containing all info.
382-
test_histogram("c(1, 1, 1)", 4, vec!["0.00", "1.00"], vec![3]);
383-
test_histogram_method("c(1, 1, 1)", "sturges", vec!["0.00", "1.00"], vec![3])
382+
test_histogram("c(1, 1, 1)", 4, vec!["1.00", "1.00"], vec![3]);
383+
test_histogram_method("c(1, 1, 1)", "sturges", vec!["1.00", "1.00"], vec![3])
384384
})
385385
}
386386

crates/ark/src/modules/positron/r_data_explorer.R

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,17 @@ profile_histogram <- function(
473473
bin_edges <- h$breaks
474474
bin_counts <- h$counts
475475

476+
# Special case: if we have a single bin, check if all values are the same
477+
# If so, override the bin edges to be the same value instead of value +/- 1
478+
if (length(bin_counts) == 1 && length(x) > 0) {
479+
# Check if all values are the same
480+
unique_values <- unique(x)
481+
if (length(unique_values) == 1) {
482+
# All values are the same, set bin edges to [value, value]
483+
bin_edges <- c(unique_values[1], unique_values[1])
484+
}
485+
}
486+
476487
# For dates, we convert back the breaks to the date representation.
477488
if (inherits(x, "POSIXct")) {
478489
# Must supply an `origin` on R <= 4.2

crates/ark/tests/data_explorer.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1986,6 +1986,51 @@ fn test_histogram() {
19861986
});
19871987
}
19881988

1989+
#[test]
1990+
fn test_histogram_single_bin_same_values() {
1991+
let _lock = r_test_lock();
1992+
1993+
let socket = open_data_explorer_from_expression("data.frame(x = rep(5, 10))", None).unwrap();
1994+
1995+
let make_histogram_req = |id, column_index, method, num_bins, quantiles| {
1996+
DataExplorerBackendRequest::GetColumnProfiles(GetColumnProfilesParams {
1997+
callback_id: id,
1998+
profiles: vec![ColumnProfileRequest {
1999+
column_index,
2000+
profiles: vec![ColumnProfileSpec {
2001+
profile_type: ColumnProfileType::SmallHistogram,
2002+
params: Some(ColumnProfileParams::SmallHistogram(ColumnHistogramParams {
2003+
method,
2004+
num_bins,
2005+
quantiles,
2006+
})),
2007+
}],
2008+
}],
2009+
format_options: default_format_options(),
2010+
})
2011+
};
2012+
2013+
let id = String::from("histogram_same_values");
2014+
let req = make_histogram_req(id.clone(), 0, ColumnHistogramParamsMethod::Fixed, 5, None);
2015+
2016+
expect_column_profile_results(&socket, req, |profiles| {
2017+
let histogram = profiles[0].small_histogram.clone().unwrap();
2018+
2019+
// When all values are the same, we should get a single bin with count = number of values
2020+
assert_eq!(histogram.bin_counts, vec![10]);
2021+
2022+
// The bin edges should be [5, 5] since all values are 5
2023+
let expected_edges = r_task(|| {
2024+
format_string(
2025+
harp::parse_eval_global("c(5, 5)").unwrap().sexp,
2026+
&default_format_options(),
2027+
)
2028+
});
2029+
assert_eq!(histogram.bin_edges, expected_edges);
2030+
assert_eq!(histogram.quantiles, vec![]);
2031+
});
2032+
}
2033+
19892034
#[test]
19902035
fn test_frequency_table() {
19912036
let _lock = r_test_lock();

0 commit comments

Comments
 (0)