Skip to content

Commit 7f1ac79

Browse files
authored
Merge pull request #104 from cnrrobertson/pivot_options
Ability to specify lists, Not lists, colon, or nothing for @pivot_longer
2 parents c9bc480 + 82516e3 commit 7f1ac79

File tree

10 files changed

+194
-10
lines changed

10 files changed

+194
-10
lines changed

NEWS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# TidierData.jl updates
22

3+
## v0.16.1 - 2024-06-09
4+
- Adds support for tuples and vectors as arguments to select multiple columns. Prefixing tuples/vectors with a `-` or `!` will exclude the selected columns.
5+
- The `:` selector from Julia is now available and equivalent to `everything()`
6+
- `@pivot_longer()` now pivots all columns if no column selectors are provided
7+
38
## v0.16.0 - 2024-06-07
49
- `unique()`, `mad()`, and `iqr()` are no longer auto-vectorized
510
- Bugfix: `@ungroup()` now preserves row-ordering (and is faster)

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "TidierData"
22
uuid = "fe2206b3-d496-4ee9-a338-6a095c4ece80"
33
authors = ["Karandeep Singh"]
4-
version = "0.16.0"
4+
version = "0.16.1"
55

66
[deps]
77
Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"

docs/examples/UserGuide/interpolation.jl

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# Note: You can only interpolate values from variables in the parent environment. If you would like to interpolate column names, you have two options: you can either use `across()` or you can use `@aside` with `@pull()` to create variables in the parent environment containing the values of those columns which can then be accessed using interpolatino.
66

7-
# myvar = :b`, `myvar = (:a, :b)`, and `myvar = [:a, :b]` all refer to *columns* with those names. On the other hand, `myvar = "b"`, `myvar = ("a", "b")` and `myvar = ["a", "b"]` will interpolate those *values*. See below for examples.
7+
# myvar = :b` and `myvar = Cols(:a, :b)` both refer to *columns* with those names. On the other hand, `myvar = "b"`, `myvar = ("a", "b")` and `myvar = ["a", "b"]` will interpolate the *values*. If you intend to interpolate column names, the preferred way is to use `Cols()` as in the examples below.
88

99
using TidierData
1010

@@ -20,9 +20,19 @@ myvar = :b
2020
@select(!!myvar)
2121
end
2222

23-
# ## Select multiple variables (vector of symbols)
23+
# ## Select multiple variables
2424

25-
myvars = [:a, :b]
25+
# You can also use a vector as in `[:a, :b]`, but `Cols()` is preferred because it lets you mix and match numbers.
26+
27+
myvars = Cols(:a, :b)
28+
29+
@chain df begin
30+
@select(!!myvars)
31+
end
32+
33+
# This is the same as this...
34+
35+
myvars = Cols(:a, 2)
2636

2737
@chain df begin
2838
@select(!!myvars)
@@ -86,7 +96,7 @@ end
8696

8797
# ## Summarize across multiple variables
8898

89-
myvars = [:b, :c]
99+
myvars = Cols(:b, :c)
90100

91101
@chain df begin
92102
@summarize(across(!!myvars, (mean, minimum, maximum)))
@@ -103,7 +113,9 @@ end
103113

104114
# ## Group by multiple interpolated variables
105115

106-
myvars = [:a, :b]
116+
# Once again, you can mix and match column selectors within `Cols()`
117+
118+
myvars = Cols(:a, 2)
107119

108120
@chain df begin
109121
@group_by(!!myvars)

docs/examples/UserGuide/pivots.jl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,18 @@ df_wide = DataFrame(id = [1, 2], A = [1, 3], B = [2, 4])
3636

3737
@pivot_longer(df_wide, -id)
3838

39+
# The selected columns can also be included as an array
40+
41+
@pivot_longer(df_wide, [id, B])
42+
43+
# or excluded
44+
45+
@pivot_longer(df_wide, -[id, B])
46+
47+
# If all columns should be included, they can be specified by either `everything()`, `:`, or by leaving the argument blank
48+
49+
@pivot_longer(df_wide, everything())
50+
3951
# In this example, we set the `names_to` and `values_to` arguments. Either argument can be left out and will revert to the default value. The `names_to` and `values_to` arguments can be provided as strings or as bare unquoted variable names.
4052

4153
# Here is an example with `names_to` and `values_to` containing strings:
@@ -45,3 +57,4 @@ df_wide = DataFrame(id = [1, 2], A = [1, 3], B = [2, 4])
4557
# And here is an example with `names_to` and `values_to` containing bare unquoted variables:
4658

4759
@pivot_longer(df_wide, A:B, names_to = letter, values_to = number)
60+

src/TidierData.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,7 @@ macro group_by(df, exprs...)
449449

450450
tidy_exprs = parse_tidy.(tidy_exprs)
451451
grouping_exprs = parse_group_by.(exprs)
452+
grouping_exprs = parse_tidy.(grouping_exprs)
452453

453454
df_expr = quote
454455
local any_expressions = any(typeof.($tidy_exprs) .!= QuoteNode)

src/docstrings.jl

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,28 @@ julia> @chain df @select(!(a:b))
260260
4 │ 14
261261
5 │ 15
262262
263+
julia> @chain df @select(-(a, b))
264+
5×1 DataFrame
265+
Row │ c
266+
│ Int64
267+
─────┼───────
268+
1 │ 11
269+
2 │ 12
270+
3 │ 13
271+
4 │ 14
272+
5 │ 15
273+
274+
julia> @chain df @select(!(a, b))
275+
5×1 DataFrame
276+
Row │ c
277+
│ Int64
278+
─────┼───────
279+
1 │ 11
280+
2 │ 12
281+
3 │ 13
282+
4 │ 14
283+
5 │ 15
284+
263285
julia> @chain df begin
264286
@select(contains("b"), starts_with("c"))
265287
end
@@ -667,6 +689,34 @@ julia> @chain df begin
667689
3 │ C 3.0
668690
4 │ D 4.0
669691
5 │ E 5.0
692+
693+
julia> @chain df begin
694+
@group_by(-(b, c)) # same as `a`
695+
@summarize(b = mean(b))
696+
end
697+
5×2 DataFrame
698+
Row │ a b
699+
│ Char Float64
700+
─────┼───────────────
701+
1 │ a 1.0
702+
2 │ b 2.0
703+
3 │ c 3.0
704+
4 │ d 4.0
705+
5 │ e 5.0
706+
707+
julia> @chain df begin
708+
@group_by(!(b, c)) # same as `a`
709+
@summarize(b = mean(b))
710+
end
711+
5×2 DataFrame
712+
Row │ a b
713+
│ Char Float64
714+
─────┼───────────────
715+
1 │ a 1.0
716+
2 │ b 2.0
717+
3 │ c 3.0
718+
4 │ d 4.0
719+
5 │ e 5.0
670720
```
671721
"""
672722

src/parsing.jl

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Not exported
2-
function parse_tidy(tidy_expr::Union{Expr,Symbol,Number}; # Can be symbol or expression
2+
function parse_tidy(tidy_expr::Union{Expr,Symbol,Number, QuoteNode}; # Can be symbol or expression
33
autovec::Bool=true, subset::Bool=false, from_across::Bool=false,
44
from_slice::Bool = false)
55
if @capture(tidy_expr, across(vars_, funcs_))
@@ -55,7 +55,11 @@ function parse_tidy(tidy_expr::Union{Expr,Symbol,Number}; # Can be symbol or exp
5555
var = QuoteNode(var)
5656
return :(Not($var))
5757
elseif @capture(tidy_expr, var_Symbol)
58-
return QuoteNode(var)
58+
if var == Symbol(":")
59+
return var
60+
else
61+
return QuoteNode(var)
62+
end
5963
elseif @capture(tidy_expr, var_Number)
6064
if var > 0
6165
return var
@@ -67,13 +71,28 @@ function parse_tidy(tidy_expr::Union{Expr,Symbol,Number}; # Can be symbol or exp
6771
end
6872
elseif @capture(tidy_expr, !var_Number)
6973
return :(Not($var))
74+
elseif @capture(tidy_expr, (tuple__,))
75+
tuple = parse_tidy.(tuple)
76+
return :(Cols($(tuple...)))
77+
elseif @capture(tidy_expr, [vec__])
78+
vec = parse_tidy.(vec)
79+
return :(Cols($(vec...)))
80+
elseif @capture(tidy_expr, -[vec__])
81+
vec = parse_tidy.(vec)
82+
return :(Not(Cols($(vec...)))) # can simpify to Not($(tuple...)) in DataFrames 1.6+
83+
elseif @capture(tidy_expr, ![vec__])
84+
vec = parse_tidy.(vec)
85+
return :(Not(Cols($(vec...)))) # can simpify to Not($(tuple...)) in DataFrames 1.6+
7086
elseif !subset & @capture(tidy_expr, -fn_(args__)) # negated selection helpers
7187
return :(Cols(!($(esc(fn))($(args...))))) # change the `-` to a `!` and return
7288
elseif !subset & @capture(tidy_expr, fn_(args__)) # selection helpers
7389
if from_across || fn == :Cols # fn == :Cols is to deal with interpolated columns
7490
return tidy_expr
7591
elseif fn == :where
7692
return :(Cols(all.(broadcast($(esc(args...)), eachcol(DataFrame(df_copy))))))
93+
elseif fn == :- || fn == :! # for negated selection as in -(A, B), which is internally represnted as function
94+
args = parse_tidy.(args)
95+
return :(Not(Cols($(args...)))) # can simpify to Not($(tuple...)) in DataFrames 1.6+
7796
else
7897
return :(Cols($(esc(tidy_expr))))
7998
end
@@ -499,4 +518,4 @@ function parse_blocks(exprs...)
499518
return (MacroTools.rmlines(exprs[1]).args...,)
500519
end
501520
return exprs
502-
end
521+
end

src/pivots.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ end
4141
$docstring_pivot_longer
4242
"""
4343
macro pivot_longer(df, exprs...)
44+
if length(exprs) == 0
45+
exprs = (:(everything()),)
46+
end
4447
exprs = parse_blocks(exprs...)
4548

4649
# take the expressions and return arg => value dictionary

test/runtests.jl

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,18 @@ DocMeta.setdocmeta!(TidierData, :DocTestSetup, :(using TidierData); recursive=tr
88

99
doctest(TidierData)
1010

11-
end
11+
end
12+
13+
using TidierData
14+
using Test
15+
using DataFrames
16+
17+
test_df = DataFrame(
18+
label = [1, 1, 2, 2],
19+
name = ["A", "B", "A", "B"],
20+
num = [1, 2, 3, 4]
21+
)
22+
23+
@testset "TidierData" verbose = true begin
24+
include("test_pivots.jl")
25+
end

test/test_pivots.jl

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
@testset "pivots" verbose = true begin
2+
@testset "pivot_wider" begin
3+
true_wide = DataFrame(
4+
label = [1, 2],
5+
A = [1, 3],
6+
B = [2, 4]
7+
)
8+
test_wide = @pivot_wider(test_df, names_from="name", values_from="num")
9+
test_wide2 = @pivot_wider(test_df, names_from=name, values_from=num)
10+
test_wide3 = @pivot_wider(test_df, names_from=:name, values_from=:num)
11+
@test all(Array(true_wide .== test_wide))
12+
@test all(Array(true_wide .== test_wide2))
13+
@test all(Array(true_wide .== test_wide3))
14+
end
15+
16+
@testset "pivot_longer" begin
17+
true_long1 = DataFrame(
18+
label = [1,1,2,2,1,1,2,2],
19+
variable = ["name","name","name","name","num","num","num","num"],
20+
value = ["A","B","A","B",1,2,3,4],
21+
)
22+
test_long1 = @pivot_longer(test_df, -label)
23+
test_long2 = @pivot_longer(test_df, name:num)
24+
25+
true_long3 = DataFrame(
26+
name = ["A","B","A","B"],
27+
num = [1,2,3,4],
28+
variable = ["label","label","label","label"],
29+
value = [1,1,2,2]
30+
)
31+
test_long3 = @pivot_longer(test_df, -(name:num))
32+
test_long4 = @pivot_longer(test_df, label)
33+
34+
true_long5 = DataFrame(
35+
name = ["A","B","A","B","A","B","A","B"],
36+
variable = ["label","label","label","label","num","num","num","num"],
37+
value = [1,1,2,2,1,2,3,4],
38+
)
39+
test_long5 = @pivot_longer(test_df, [label,num])
40+
41+
true_long6 = DataFrame(
42+
label = [1,1,2,2],
43+
num = [1,2,3,4],
44+
variable = ["name","name","name","name"],
45+
value = ["A","B","A","B"],
46+
)
47+
test_long6 = @pivot_longer(test_df, -[label,num])
48+
49+
true_long7 = DataFrame(
50+
variable = ["label","label","label","label","name","name","name","name","num","num","num","num"],
51+
value = [1,1,2,2,"A","B","A","B",1,2,3,4],
52+
)
53+
test_long7 = @pivot_longer(test_df, :)
54+
test_long8 = @pivot_longer(test_df)
55+
test_long9 = @pivot_longer(test_df, everything())
56+
57+
@test all(Array(true_long1 .== test_long1))
58+
@test all(Array(true_long1 .== test_long2))
59+
@test all(Array(true_long3 .== test_long3))
60+
@test all(Array(true_long3 .== test_long4))
61+
@test all(Array(true_long5 .== test_long5))
62+
@test all(Array(true_long6 .== test_long6))
63+
@test all(Array(true_long7 .== test_long7))
64+
@test all(Array(true_long7 .== test_long8))
65+
@test all(Array(true_long7 .== test_long9))
66+
end
67+
end

0 commit comments

Comments
 (0)