Skip to content

Commit afdc188

Browse files
drizk1kdpsingh
andauthored
fixes @summary (#124)
* fixes `@summary` * Added support for non-numeric columns, minor tweaks to column names. --------- Co-authored-by: Karandeep Singh <[email protected]>
1 parent 70b35d4 commit afdc188

File tree

4 files changed

+37
-14
lines changed

4 files changed

+37
-14
lines changed

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# TidierData.jl updates
22

3+
## v16.3
4+
- Bugfix: `@summary` no longer errors with non-numeric columns. Instead, it only reports non-numeric summary stats on non-numeric columns. Minor changes to summary column names to be lowercase and snakecase.
5+
36
## v0.16.2 - 2024-09-03
47
- Bugfix: `@slice_min` and `@slice_max` respect the `n` argument
58
- Adds `@head`

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "TidierData"
22
uuid = "fe2206b3-d496-4ee9-a338-6a095c4ece80"
33
authors = ["Karandeep Singh"]
4-
version = "0.16.2"
4+
version = "0.16.3"
55

66
[deps]
77
Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"

src/docstrings.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2415,7 +2415,8 @@ For numerical columns, returns a dataframe with the Q1,Q3, min, max, mean, media
24152415
julia> df = DataFrame(a = [1, 2, 3, 4, 5],
24162416
b = [missing, 7, 8, 9, 10],
24172417
c = [11, missing, 13, 14, missing],
2418-
d = [16, 17, 18, 19, 20]);
2418+
d = [16.1, 17.2, 18.3, 19.4, 20.5],
2419+
e = ["a", "a", "a", "a", "a"]);
24192420
24202421
julia> @summary(df);
24212422

src/summary.jl

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,37 @@ function summary_stats(df::DataFrame)
33
summary_data = []
44
for column in colnames
55
col = df[:, column]
6-
col_nonmissing = collect(skipmissing(col))
7-
push!(summary_data, (
8-
Column = column,
9-
Min = minimum(col_nonmissing),
10-
Q1 = quantile(col_nonmissing, 0.25),
11-
Median = median(col_nonmissing),
12-
Mean = mean(col_nonmissing),
13-
Q3 = quantile(col_nonmissing, 0.75),
14-
Max = maximum(col_nonmissing),
15-
Count = length(col_nonmissing),
16-
Missing_Count = count(ismissing, col)
17-
))
6+
if eltype(col) <: Union{Number, Missing}
7+
col_nonmissing = collect(skipmissing(col))
8+
push!(summary_data, (
9+
column = column,
10+
min = minimum(col_nonmissing),
11+
q1 = quantile(col_nonmissing, 0.25),
12+
median = median(col_nonmissing),
13+
mean = mean(col_nonmissing),
14+
q3 = quantile(col_nonmissing, 0.75),
15+
max = maximum(col_nonmissing),
16+
non_missing_values = length(col_nonmissing),
17+
missing_values = count(ismissing, col),
18+
total_values = length(col),
19+
unique_values = length(unique(col_nonmissing))
20+
))
21+
else
22+
col_nonmissing = collect(skipmissing(col))
23+
push!(summary_data, (
24+
column = column,
25+
min = nothing,
26+
q1 = nothing,
27+
median = nothing,
28+
mean = nothing,
29+
q3 = nothing,
30+
max = nothing,
31+
non_missing_values = length(col_nonmissing),
32+
missing_values = count(ismissing, col),
33+
total_values = length(col),
34+
unique_values = length(unique(col_nonmissing))
35+
))
36+
end
1837
end
1938
return DataFrame(summary_data)
2039
end

0 commit comments

Comments
 (0)