From 98abf1cedca38b9ea354cad3ff19e8f6ea72e507 Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Fri, 30 Aug 2024 12:25:36 -0400 Subject: [PATCH 01/16] Update runtests.jl --- test/runtests.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/runtests.jl b/test/runtests.jl index 0b299eb1..de6f2ebb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -22,4 +22,5 @@ test_df = DataFrame( @testset "TidierData" verbose = true begin include("test_pivots.jl") + include("test_mutate.jl") end From 7191f32a09855aaddef5c0bf715839772ac94c16 Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Fri, 30 Aug 2024 12:25:52 -0400 Subject: [PATCH 02/16] add initial tests --- test/test_mutate.jl | 65 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 test/test_mutate.jl diff --git a/test/test_mutate.jl b/test/test_mutate.jl new file mode 100644 index 00000000..243c94e8 --- /dev/null +++ b/test/test_mutate.jl @@ -0,0 +1,65 @@ +@testset "mutate" verbose = true begin + + @testset "empty mutate returns input" begin + df = DataFrame(x = 1) + gf = @group_by(df, x) + + @test isequal(@mutate(df), df) + @test isequal(@mutate(gf), gf) + + + @test isequal(@mutate(df, []), df) + @test isequal(@mutate(gf, []), gf) + end + + @testset "mutations applied progressively" begin + df = DataFrame(x = 1) + @test isequal( + (@mutate df begin + y = x + 1 + z = y + 1 + end + ), + DataFrame(x = 1, y = 2, z = 3) + ) + + @test isequal( + (@mutate df begin + x = x + 1 + x = x + 1 + end + ), + DataFrame(x = 3) + ) + + @test isequal( + (@mutate df begin + x = 2 + y = x + end + ), + DataFrame(x = 2, y = 2) + ) + + df = DataFrame(x = 1, y = 2) + @test isequal( + (@mutate df begin + x2 = x + x3 = x2 + 1 + end + ), + (@mutate df begin + x2 = x + 0 + x3 = x2 + 1 + end + ) + ) + end + + @testset " length-1 vectors are recycled" begin + df = DataFrame(x = 1:4) + @test isequal(@mutate(df, y = 1)[!, :y], fill(1, 4)) + @test_throws "ArgumentError: New columns must have the same length as old columns" @mutate(df, y = 1:2) + end + +end From 7fd39651884d341705e6d23b3330ca881c05a69a Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Fri, 30 Aug 2024 12:39:21 -0400 Subject: [PATCH 03/16] add variable remove test --- test/test_mutate.jl | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/test/test_mutate.jl b/test/test_mutate.jl index 243c94e8..9c6f3809 100644 --- a/test/test_mutate.jl +++ b/test/test_mutate.jl @@ -62,4 +62,23 @@ @test_throws "ArgumentError: New columns must have the same length as old columns" @mutate(df, y = 1:2) end + @testset "can remove variables with nothing" begin + df = DataFrame(x = 1:3, y = 1:3) + + @test isequal(@mutate(df, y = nothing), df[:, [1]]) + @test isequal(@ungroup(@mutate(gf, y = nothing)), gf[:, [1]]) + + # even if it doesn't exist + @test isequal(@mutate(df, z = nothing), df[:, [1]]) + + # or was just created + @test isequal( + (@mutate df begin + z = 1, + z = nothing + end + ), + df + ) + end end From 24813e106362760da627fbc0e14c87bc8285f76a Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Fri, 30 Aug 2024 13:01:54 -0400 Subject: [PATCH 04/16] add constant, empty df tests --- test/test_mutate.jl | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/test/test_mutate.jl b/test/test_mutate.jl index 9c6f3809..5aa2f8e7 100644 --- a/test/test_mutate.jl +++ b/test/test_mutate.jl @@ -81,4 +81,34 @@ df ) end + + @testset "mutate supports constants" begin + df = DataFrame(x = 1:10, g = repeat(1:2, inner = 5)) + y = 1:10 + z = 1:5 + + @test isequal(@mutate(df, y = !!y)[!, :y], y) + @test isequal( + (@chain df begin + @group_by(g) + @mutate y = !!y + @ungroup + @pull y + end + ) + ) + end + + @testset "mutate works on empty dataframes" begin + df = DataFrame() + res = @mutate(df) + @test isequal(nrow(res), 0) + @test isequal(ncol(res), 0) + + res = @mutate(df, x = Int64[]) + @test isequal(names(res), ["x"]) + @test isequal(nrow(res), 0) + @test isequal(ncol(res), 0) + end + end From 232c5aebe0b7bd90c90d590554e67492e9e05416 Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Fri, 30 Aug 2024 13:05:14 -0400 Subject: [PATCH 05/16] syntax error --- test/test_mutate.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_mutate.jl b/test/test_mutate.jl index 5aa2f8e7..1a6c0594 100644 --- a/test/test_mutate.jl +++ b/test/test_mutate.jl @@ -56,7 +56,7 @@ ) end - @testset " length-1 vectors are recycled" begin + @testset "length-1 vectors are recycled" begin df = DataFrame(x = 1:4) @test isequal(@mutate(df, y = 1)[!, :y], fill(1, 4)) @test_throws "ArgumentError: New columns must have the same length as old columns" @mutate(df, y = 1:2) @@ -74,7 +74,7 @@ # or was just created @test isequal( (@mutate df begin - z = 1, + z = 1 z = nothing end ), From d2eaadd4c4e5c43aa410b229964da278fb41c4a8 Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Sat, 31 Aug 2024 11:01:51 -0400 Subject: [PATCH 06/16] should have one column actually --- test/test_mutate.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_mutate.jl b/test/test_mutate.jl index 1a6c0594..efc7d75b 100644 --- a/test/test_mutate.jl +++ b/test/test_mutate.jl @@ -108,7 +108,7 @@ res = @mutate(df, x = Int64[]) @test isequal(names(res), ["x"]) @test isequal(nrow(res), 0) - @test isequal(ncol(res), 0) + @test isequal(ncol(res), 1) end end From 052e34737d66866dda044e3b38448b56893b1000 Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Sat, 31 Aug 2024 11:09:11 -0400 Subject: [PATCH 07/16] comment this one out for now --- test/test_mutate.jl | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/test/test_mutate.jl b/test/test_mutate.jl index efc7d75b..2f9e25bc 100644 --- a/test/test_mutate.jl +++ b/test/test_mutate.jl @@ -88,15 +88,19 @@ z = 1:5 @test isequal(@mutate(df, y = !!y)[!, :y], y) - @test isequal( - (@chain df begin - @group_by(g) - @mutate y = !!y - @ungroup - @pull y - end - ) - ) + #= + i'm not actually sure whether this test would be one-to-one with Julia, + given that grouped dataframes behave differently than in R. + =# + # @test isequal( + # (@chain df begin + # @group_by(g) + # @mutate y = !!y + # @ungroup + # @pull y + # end + # ) + # ) end @testset "mutate works on empty dataframes" begin From 3c274315110daacb30cfc0aa913b17e3ec689971 Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Sat, 31 Aug 2024 13:33:19 -0400 Subject: [PATCH 08/16] remove sequential tests This isn't one-to-one with dplyr because of how dataframes works under the hood --- test/test_mutate.jl | 44 -------------------------------------------- 1 file changed, 44 deletions(-) diff --git a/test/test_mutate.jl b/test/test_mutate.jl index 2f9e25bc..feaf4fbe 100644 --- a/test/test_mutate.jl +++ b/test/test_mutate.jl @@ -12,50 +12,6 @@ @test isequal(@mutate(gf, []), gf) end - @testset "mutations applied progressively" begin - df = DataFrame(x = 1) - @test isequal( - (@mutate df begin - y = x + 1 - z = y + 1 - end - ), - DataFrame(x = 1, y = 2, z = 3) - ) - - @test isequal( - (@mutate df begin - x = x + 1 - x = x + 1 - end - ), - DataFrame(x = 3) - ) - - @test isequal( - (@mutate df begin - x = 2 - y = x - end - ), - DataFrame(x = 2, y = 2) - ) - - df = DataFrame(x = 1, y = 2) - @test isequal( - (@mutate df begin - x2 = x - x3 = x2 + 1 - end - ), - (@mutate df begin - x2 = x + 0 - x3 = x2 + 1 - end - ) - ) - end - @testset "length-1 vectors are recycled" begin df = DataFrame(x = 1:4) @test isequal(@mutate(df, y = 1)[!, :y], fill(1, 4)) From b0e7acd28613f016031fc0f435d6bad8cfb9537d Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Sat, 31 Aug 2024 13:44:30 -0400 Subject: [PATCH 09/16] add additional example this example clarifies that progressive transformations require separate calls to mutate --- src/docstrings.jl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/docstrings.jl b/src/docstrings.jl index 7a08f6bc..2382eda0 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -531,6 +531,21 @@ julia> @chain df begin 3 │ c 3 13 1 11 4 │ d 4 14 1 11 5 │ e 5 15 1 11 +Note that unlike dplyr, @mutate transformations cannot be applied progressively-meaning you cannot reuse variables within the same @mutate call. This requires the use of separate @mutate calls. + +julia> @chain df begin + @mutate b2 = b * 2 + @mutate b3 = b2 * 2 + end +5×5 DataFrame + Row │ a b c b2 b3 + │ Char Int64 Int64 Int64 Int64 +─────┼────────────────────────────────── + 1 │ a 1 11 2 4 + 2 │ b 2 12 4 8 + 3 │ c 3 13 6 12 + 4 │ d 4 14 8 16 + 5 │ e 5 15 10 20 ``` """ From 68d9a17751e598a950cc4933dde18dc367577a55 Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Sat, 31 Aug 2024 13:46:23 -0400 Subject: [PATCH 10/16] Revert "add additional example" This reverts commit b0e7acd28613f016031fc0f435d6bad8cfb9537d. --- src/docstrings.jl | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/docstrings.jl b/src/docstrings.jl index 2382eda0..7a08f6bc 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -531,21 +531,6 @@ julia> @chain df begin 3 │ c 3 13 1 11 4 │ d 4 14 1 11 5 │ e 5 15 1 11 -Note that unlike dplyr, @mutate transformations cannot be applied progressively-meaning you cannot reuse variables within the same @mutate call. This requires the use of separate @mutate calls. - -julia> @chain df begin - @mutate b2 = b * 2 - @mutate b3 = b2 * 2 - end -5×5 DataFrame - Row │ a b c b2 b3 - │ Char Int64 Int64 Int64 Int64 -─────┼────────────────────────────────── - 1 │ a 1 11 2 4 - 2 │ b 2 12 4 8 - 3 │ c 3 13 6 12 - 4 │ d 4 14 8 16 - 5 │ e 5 15 10 20 ``` """ From a2f1ad3b371e3bca005e435f9cfe3b6fae7d4b67 Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Sat, 31 Aug 2024 13:48:34 -0400 Subject: [PATCH 11/16] add docs about this behavior --- src/docstrings.jl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/docstrings.jl b/src/docstrings.jl index 7a08f6bc..6b82d7a4 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -532,6 +532,22 @@ julia> @chain df begin 4 │ d 4 14 1 11 5 │ e 5 15 1 11 ``` +Note that unlike dplyr, @mutate transformations cannot be applied progressively-meaning you cannot reuse variables within the same @mutate call. This requires the use of separate @mutate calls. +``` +julia> @chain df begin + @mutate b2 = b * 2 + @mutate b3 = b2 * 2 + end +5×5 DataFrame + Row │ a b c b2 b3 + │ Char Int64 Int64 Int64 Int64 +─────┼────────────────────────────────── + 1 │ a 1 11 2 4 + 2 │ b 2 12 4 8 + 3 │ c 3 13 6 12 + 4 │ d 4 14 8 16 + 5 │ e 5 15 10 20 +``` """ const docstring_summarize = From 80810606249d50df72dd196d2989a388d6f9d37e Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Sat, 31 Aug 2024 13:53:16 -0400 Subject: [PATCH 12/16] clarify docstrings --- src/docstrings.jl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/docstrings.jl b/src/docstrings.jl index 6b82d7a4..bea5df12 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -535,6 +535,15 @@ julia> @chain df begin Note that unlike dplyr, @mutate transformations cannot be applied progressively-meaning you cannot reuse variables within the same @mutate call. This requires the use of separate @mutate calls. ``` julia> @chain df begin + #= + it's tempting to do this: + @mutate begin + b2 = b * 2 + b3 = b2 * 2 + end + but this syntactic sugar isn't supported. + use separate @mutate calls instead. + =# @mutate b2 = b * 2 @mutate b3 = b2 * 2 end From ca4b623ebc4677660bc29b4f5f9fac4dca5bb09c Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Sat, 31 Aug 2024 13:57:35 -0400 Subject: [PATCH 13/16] remove this test --- test/test_mutate.jl | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/test/test_mutate.jl b/test/test_mutate.jl index feaf4fbe..c83c34ac 100644 --- a/test/test_mutate.jl +++ b/test/test_mutate.jl @@ -44,19 +44,6 @@ z = 1:5 @test isequal(@mutate(df, y = !!y)[!, :y], y) - #= - i'm not actually sure whether this test would be one-to-one with Julia, - given that grouped dataframes behave differently than in R. - =# - # @test isequal( - # (@chain df begin - # @group_by(g) - # @mutate y = !!y - # @ungroup - # @pull y - # end - # ) - # ) end @testset "mutate works on empty dataframes" begin From 521447116ac4eda0c5aabc24153a23e1a6c4e712 Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Sat, 31 Aug 2024 14:05:07 -0400 Subject: [PATCH 14/16] formatting --- src/docstrings.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/docstrings.jl b/src/docstrings.jl index bea5df12..28b115e3 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -537,10 +537,12 @@ Note that unlike dplyr, @mutate transformations cannot be applied progressively- julia> @chain df begin #= it's tempting to do this: + @mutate begin b2 = b * 2 b3 = b2 * 2 end + but this syntactic sugar isn't supported. use separate @mutate calls instead. =# From 569a2483ab03575f63590c7fcc4a86cd3a6553b2 Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Tue, 3 Sep 2024 21:51:23 -0400 Subject: [PATCH 15/16] match the naming of slice tests --- test/test_mutate.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_mutate.jl b/test/test_mutate.jl index c83c34ac..5fe8cf78 100644 --- a/test/test_mutate.jl +++ b/test/test_mutate.jl @@ -1,4 +1,4 @@ -@testset "mutate" verbose = true begin +@testset "@mutate()" verbose = true begin @testset "empty mutate returns input" begin df = DataFrame(x = 1) From 1a6bae9f80fef67f3955d74e3cfcc09bd18de6d7 Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Mon, 16 Sep 2024 10:37:37 -0400 Subject: [PATCH 16/16] drop nothing test --- test/test_mutate.jl | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/test/test_mutate.jl b/test/test_mutate.jl index 5fe8cf78..1f536a90 100644 --- a/test/test_mutate.jl +++ b/test/test_mutate.jl @@ -18,26 +18,6 @@ @test_throws "ArgumentError: New columns must have the same length as old columns" @mutate(df, y = 1:2) end - @testset "can remove variables with nothing" begin - df = DataFrame(x = 1:3, y = 1:3) - - @test isequal(@mutate(df, y = nothing), df[:, [1]]) - @test isequal(@ungroup(@mutate(gf, y = nothing)), gf[:, [1]]) - - # even if it doesn't exist - @test isequal(@mutate(df, z = nothing), df[:, [1]]) - - # or was just created - @test isequal( - (@mutate df begin - z = 1 - z = nothing - end - ), - df - ) - end - @testset "mutate supports constants" begin df = DataFrame(x = 1:10, g = repeat(1:2, inner = 5)) y = 1:10