Skip to content

Commit 0b346e5

Browse files
committed
Merge branch 'main' of https://github.com/TidierOrg/TidierData.jl into pr/cnrrobertson/104
2 parents 8b1c05e + c9bc480 commit 0b346e5

File tree

7 files changed

+29
-12
lines changed

7 files changed

+29
-12
lines changed

NEWS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# TidierData.jl updates
22

3+
## v0.16.0 - 2024-06-07
4+
- `unique()`, `mad()`, and `iqr()` are no longer auto-vectorized
5+
- Bugfix: `@ungroup()` now preserves row-ordering (and is faster)
6+
- Bugfix: `slice_sample()` now throws an error if no `n` or `prop` keyword argument is provided
7+
- Bump minimum Julia version to 1.9
8+
39
## v0.15.2 - 2024-04-19
410
- Update Chain.jl dependency version
511

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "TidierData"
22
uuid = "fe2206b3-d496-4ee9-a338-6a095c4ece80"
33
authors = ["Karandeep Singh"]
4-
version = "0.15.2"
4+
version = "0.16.0"
55

66
[deps]
77
Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"
@@ -22,7 +22,7 @@ Reexport = "0.2, 1"
2222
ShiftedArrays = "2"
2323
Statistics = "1.6"
2424
StatsBase = "0.34, 1"
25-
julia = "1.6"
25+
julia = "1.9"
2626

2727
[extras]
2828
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

docs/examples/UserGuide/conditionals.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ end
3434

3535
# Although `if_else()` is convenient when evaluating a single condition, it can be cumbersome when evaluating multiple conditions because subsequent conditions need to be nested within the `no` condition for the preceding argument. For situations where multiple conditions need to be evaluated, `case_when()` is more convenient.
3636

37-
# Let's first consider a similar example from above and recreate it using `case_when()`. The following code creates a column `b` that assigns a value if 3 if `a >= 3` and otherwise leaves the value unchanged.
37+
# Let's first consider a similar example from above and recreate it using `case_when()`. The following code creates a column `b` that assigns a value of 3 if `a >= 3` and otherwise leaves the value unchanged.
3838

3939
@chain df begin
4040
@mutate(b = case_when(a >= 3 => 3,
@@ -72,4 +72,4 @@ end
7272

7373
# ## Do these functions work outside of TidierData.jl?
7474

75-
# Yes, both `if_else()` and `case_when()` work outside of TidierData.jl. However, you'll need to remember that if working with vectors, both the functions and conditions will need to be vectorized, and in the case of `case_when()`, the `=>` will need to be written as `.=>`. The reason this is not needed when using these functions inside of TidierData.jl is because they are auto-vectorized.
75+
# Yes, both `if_else()` and `case_when()` work outside of TidierData.jl. However, you'll need to remember that if working with vectors, both the functions and conditions will need to be vectorized, and in the case of `case_when()`, the `=>` will need to be written as `.=>`. The reason this is not needed when using these functions inside of TidierData.jl is because they are auto-vectorized.

docs/examples/UserGuide/slice.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ end
6464
# ## Sample 5 random rows in the data frame
6565

6666
@chain df begin
67-
@slice_sample(5)
67+
@slice_sample(n = 5)
6868
end
6969

7070
# ## Slice the min
@@ -99,4 +99,4 @@ end
9999

100100
@chain df begin
101101
@slice_head(n = 3)
102-
end
102+
end

src/TidierData.jl

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ const code = Ref{Bool}(false) # output DataFrames.jl code?
2828
const log = Ref{Bool}(false) # output tidylog output? (not yet implemented)
2929

3030
# The global do-not-vectorize "list"
31-
const not_vectorized = Ref{Vector{Symbol}}([:getindex, :rand, :esc, :Ref, :Set, :Cols, :collect, :(:), :, :lag, :lead, :ntile, :repeat, :across, :desc, :mean, :std, :var, :median, :first, :last, :minimum, :maximum, :sum, :length, :skipmissing, :quantile, :passmissing, :cumsum, :cumprod, :accumulate, :is_float, :is_integer, :is_string, :cat_rev, :cat_relevel, :cat_infreq, :cat_lump, :cat_reorder, :cat_collapse, :cat_lump_min, :cat_lump_prop, :categorical, :as_categorical, :is_categorical])
31+
const not_vectorized = Ref{Vector{Symbol}}([:getindex, :rand, :esc, :Ref, :Set, :Cols, :collect, :(:), :, :lag, :lead, :ntile, :repeat, :across, :desc, :mean, :std, :var, :median, :mad, :first, :last, :minimum, :maximum, :sum, :length, :skipmissing, :quantile, :passmissing, :cumsum, :cumprod, :accumulate, :is_float, :is_integer, :is_string, :cat_rev, :cat_relevel, :cat_infreq, :cat_lump, :cat_reorder, :cat_collapse, :cat_lump_min, :cat_lump_prop, :categorical, :as_categorical, :is_categorical, :unique, :iqr])
3232

3333
# The global do-not-escape "list"
3434
# `in`, `∈`, and `∉` should be vectorized in auto-vec but not escaped
@@ -494,7 +494,17 @@ end
494494
$docstring_ungroup
495495
"""
496496
macro ungroup(df)
497-
:(DataFrame($(esc(df))))
497+
df_expr = quote
498+
if $(esc(df)) isa GroupedDataFrame
499+
transform($(esc(df)); ungroup = true)
500+
else
501+
copy($(esc(df)))
502+
end
503+
end
504+
if code[]
505+
@info MacroTools.prettify(df_expr)
506+
end
507+
return df_expr
498508
end
499509

500510
"""
@@ -542,7 +552,7 @@ macro distinct(df, exprs...)
542552
# because if the original DataFrame is grouped, it must be ungrouped
543553
# and then regrouped, so there's no need to make a copy up front.
544554
# This is because `unique()` does not work on GroupDataFrames.
545-
local df_copy = DataFrame($(esc(df)))
555+
local df_copy = transform($(esc(df)); ungroup = true)
546556
if $any_found_n
547557
transform!(df_copy, nrow => :TidierData_n)
548558
end

src/docstrings.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1320,14 +1320,15 @@ julia> @semi_join(df1, df2, "a" = "a")
13201320

13211321
const docstring_pivot_wider =
13221322
"""
1323-
@pivot_wider(df, names_from, values_from)
1323+
@pivot_wider(df, names_from, values_from[, values_fill])
13241324
13251325
Reshapes the DataFrame to make it wider, increasing the number of columns and reducing the number of rows.
13261326
13271327
# Arguments
13281328
- `df`: A DataFrame.
13291329
- `names_from`: The name of the column to get the name of the output columns from.
13301330
- `values_from`: The name of the column to get the cell values from.
1331+
- `values_fill`: The value to replace a missing name/value combination (default is `missing`)
13311332
13321333
# Examples
13331334
```jldoctest
@@ -3409,4 +3410,4 @@ julia> @relocate(df, B:C) # bring columns to the front
34093410
4 │ 9 D 4 B 4 D
34103411
5 │ 10 E 5 C 5 E
34113412
```
3412-
"""
3413+
"""

src/slice.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ macro slice_sample(df, exprs...)
6464
as_integer(floor(n() * $expr_dict[:prop]));
6565
replace=$replace))
6666
else
67-
@slice($(esc(df)), sample(1:n(), 1; replace=$replace))
67+
throw("Please provide either an `n` or a `prop` value as a keyword argument.")
6868
end
6969
end
7070

0 commit comments

Comments
 (0)