Skip to content

Commit 82516e3

Browse files
committed
Adds support for tuples and vectors as arguments to select multiple columns. Prefixing tuples/vectors with a - or ! will exclude the selected columns. Updated docs to use Cols() for column interpolation.
1 parent 0b346e5 commit 82516e3

File tree

7 files changed

+91
-12
lines changed

7 files changed

+91
-12
lines changed

NEWS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# TidierData.jl updates
22

3+
## v0.16.1 - 2024-06-09
4+
- Adds support for tuples and vectors as arguments to select multiple columns. Prefixing tuples/vectors with a `-` or `!` will exclude the selected columns.
5+
- The `:` selector from Julia is now available and equivalent to `everything()`
6+
- `@pivot_longer()` now pivots all columns if no column selectors are provided
7+
38
## v0.16.0 - 2024-06-07
49
- `unique()`, `mad()`, and `iqr()` are no longer auto-vectorized
510
- Bugfix: `@ungroup()` now preserves row-ordering (and is faster)

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "TidierData"
22
uuid = "fe2206b3-d496-4ee9-a338-6a095c4ece80"
33
authors = ["Karandeep Singh"]
4-
version = "0.16.0"
4+
version = "0.16.1"
55

66
[deps]
77
Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc"

docs/examples/UserGuide/interpolation.jl

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# Note: You can only interpolate values from variables in the parent environment. If you would like to interpolate column names, you have two options: you can either use `across()` or you can use `@aside` with `@pull()` to create variables in the parent environment containing the values of those columns which can then be accessed using interpolatino.
66

7-
# myvar = :b`, `myvar = (:a, :b)`, and `myvar = [:a, :b]` all refer to *columns* with those names. On the other hand, `myvar = "b"`, `myvar = ("a", "b")` and `myvar = ["a", "b"]` will interpolate those *values*. See below for examples.
7+
# myvar = :b` and `myvar = Cols(:a, :b)` both refer to *columns* with those names. On the other hand, `myvar = "b"`, `myvar = ("a", "b")` and `myvar = ["a", "b"]` will interpolate the *values*. If you intend to interpolate column names, the preferred way is to use `Cols()` as in the examples below.
88

99
using TidierData
1010

@@ -20,9 +20,19 @@ myvar = :b
2020
@select(!!myvar)
2121
end
2222

23-
# ## Select multiple variables (vector of symbols)
23+
# ## Select multiple variables
2424

25-
myvars = [:a, :b]
25+
# You can also use a vector as in `[:a, :b]`, but `Cols()` is preferred because it lets you mix and match numbers.
26+
27+
myvars = Cols(:a, :b)
28+
29+
@chain df begin
30+
@select(!!myvars)
31+
end
32+
33+
# This is the same as this...
34+
35+
myvars = Cols(:a, 2)
2636

2737
@chain df begin
2838
@select(!!myvars)
@@ -86,7 +96,7 @@ end
8696

8797
# ## Summarize across multiple variables
8898

89-
myvars = [:b, :c]
99+
myvars = Cols(:b, :c)
90100

91101
@chain df begin
92102
@summarize(across(!!myvars, (mean, minimum, maximum)))
@@ -103,7 +113,9 @@ end
103113

104114
# ## Group by multiple interpolated variables
105115

106-
myvars = [:a, :b]
116+
# Once again, you can mix and match column selectors within `Cols()`
117+
118+
myvars = Cols(:a, 2)
107119

108120
@chain df begin
109121
@group_by(!!myvars)

src/TidierData.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,7 @@ macro group_by(df, exprs...)
449449

450450
tidy_exprs = parse_tidy.(tidy_exprs)
451451
grouping_exprs = parse_group_by.(exprs)
452+
grouping_exprs = parse_tidy.(grouping_exprs)
452453

453454
df_expr = quote
454455
local any_expressions = any(typeof.($tidy_exprs) .!= QuoteNode)

src/docstrings.jl

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,28 @@ julia> @chain df @select(!(a:b))
260260
4 │ 14
261261
5 │ 15
262262
263+
julia> @chain df @select(-(a, b))
264+
5×1 DataFrame
265+
Row │ c
266+
│ Int64
267+
─────┼───────
268+
1 │ 11
269+
2 │ 12
270+
3 │ 13
271+
4 │ 14
272+
5 │ 15
273+
274+
julia> @chain df @select(!(a, b))
275+
5×1 DataFrame
276+
Row │ c
277+
│ Int64
278+
─────┼───────
279+
1 │ 11
280+
2 │ 12
281+
3 │ 13
282+
4 │ 14
283+
5 │ 15
284+
263285
julia> @chain df begin
264286
@select(contains("b"), starts_with("c"))
265287
end
@@ -667,6 +689,34 @@ julia> @chain df begin
667689
3 │ C 3.0
668690
4 │ D 4.0
669691
5 │ E 5.0
692+
693+
julia> @chain df begin
694+
@group_by(-(b, c)) # same as `a`
695+
@summarize(b = mean(b))
696+
end
697+
5×2 DataFrame
698+
Row │ a b
699+
│ Char Float64
700+
─────┼───────────────
701+
1 │ a 1.0
702+
2 │ b 2.0
703+
3 │ c 3.0
704+
4 │ d 4.0
705+
5 │ e 5.0
706+
707+
julia> @chain df begin
708+
@group_by(!(b, c)) # same as `a`
709+
@summarize(b = mean(b))
710+
end
711+
5×2 DataFrame
712+
Row │ a b
713+
│ Char Float64
714+
─────┼───────────────
715+
1 │ a 1.0
716+
2 │ b 2.0
717+
3 │ c 3.0
718+
4 │ d 4.0
719+
5 │ e 5.0
670720
```
671721
"""
672722

src/parsing.jl

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Not exported
2-
function parse_tidy(tidy_expr::Union{Expr,Symbol,Number}; # Can be symbol or expression
2+
function parse_tidy(tidy_expr::Union{Expr,Symbol,Number, QuoteNode}; # Can be symbol or expression
33
autovec::Bool=true, subset::Bool=false, from_across::Bool=false,
44
from_slice::Bool = false)
55
if @capture(tidy_expr, across(vars_, funcs_))
@@ -32,10 +32,6 @@ function parse_tidy(tidy_expr::Union{Expr,Symbol,Number}; # Can be symbol or exp
3232
endindex = QuoteNode(endindex)
3333
end
3434
return :(Between($startindex, $endindex))
35-
elseif @capture(tidy_expr, names_vect)
36-
return Symbol.(names.args)
37-
elseif @capture(tidy_expr, -names_vect)
38-
return Not(Symbol.(names.args))
3935
elseif @capture(tidy_expr, (lhs_ = fn_(args__)) | (lhs_ = fn_.(args__)))
4036
if length(args) == 0
4137
lhs = QuoteNode(lhs)
@@ -75,13 +71,28 @@ function parse_tidy(tidy_expr::Union{Expr,Symbol,Number}; # Can be symbol or exp
7571
end
7672
elseif @capture(tidy_expr, !var_Number)
7773
return :(Not($var))
74+
elseif @capture(tidy_expr, (tuple__,))
75+
tuple = parse_tidy.(tuple)
76+
return :(Cols($(tuple...)))
77+
elseif @capture(tidy_expr, [vec__])
78+
vec = parse_tidy.(vec)
79+
return :(Cols($(vec...)))
80+
elseif @capture(tidy_expr, -[vec__])
81+
vec = parse_tidy.(vec)
82+
return :(Not(Cols($(vec...)))) # can simpify to Not($(tuple...)) in DataFrames 1.6+
83+
elseif @capture(tidy_expr, ![vec__])
84+
vec = parse_tidy.(vec)
85+
return :(Not(Cols($(vec...)))) # can simpify to Not($(tuple...)) in DataFrames 1.6+
7886
elseif !subset & @capture(tidy_expr, -fn_(args__)) # negated selection helpers
7987
return :(Cols(!($(esc(fn))($(args...))))) # change the `-` to a `!` and return
8088
elseif !subset & @capture(tidy_expr, fn_(args__)) # selection helpers
8189
if from_across || fn == :Cols # fn == :Cols is to deal with interpolated columns
8290
return tidy_expr
8391
elseif fn == :where
8492
return :(Cols(all.(broadcast($(esc(args...)), eachcol(DataFrame(df_copy))))))
93+
elseif fn == :- || fn == :! # for negated selection as in -(A, B), which is internally represnted as function
94+
args = parse_tidy.(args)
95+
return :(Not(Cols($(args...)))) # can simpify to Not($(tuple...)) in DataFrames 1.6+
8596
else
8697
return :(Cols($(esc(tidy_expr))))
8798
end

src/pivots.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ $docstring_pivot_longer
4242
"""
4343
macro pivot_longer(df, exprs...)
4444
if length(exprs) == 0
45-
exprs = (:(:),)
45+
exprs = (:(everything()),)
4646
end
4747
exprs = parse_blocks(exprs...)
4848

0 commit comments

Comments
 (0)