Skip to content

Commit 1a5da8a

Browse files
authored
add cols to mapcols and mapcols! (#3386)
1 parent 87c2162 commit 1a5da8a

File tree

3 files changed

+90
-17
lines changed

3 files changed

+90
-17
lines changed

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
column names only to a subset of the columns specified by the `cols`
1010
keyword argument
1111
([#3380](https://github.com/JuliaData/DataFrames.jl/pull/3380))
12+
* `mapcols` and `mapcols!` now allow to apply a function transforming
13+
columns only to a subset of the columns specified by the `cols`
14+
keyword argument
15+
([#3386](https://github.com/JuliaData/DataFrames.jl/pull/3386))
1216

1317
## Bug fixes
1418

src/abstractdataframe/iteration.jl

Lines changed: 61 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -107,20 +107,20 @@ as a `DataFrameRows` over a view of rows of parent of `dfr`.
107107
julia> collect(Iterators.partition(eachrow(DataFrame(x=1:5)), 2))
108108
3-element Vector{DataFrames.DataFrameRows{SubDataFrame{DataFrame, DataFrames.Index, UnitRange{Int64}}}}:
109109
2×1 DataFrameRows
110-
Row │ x
111-
│ Int64
110+
Row │ x
111+
│ Int64
112112
─────┼───────
113113
1 │ 1
114114
2 │ 2
115115
2×1 DataFrameRows
116-
Row │ x
117-
│ Int64
116+
Row │ x
117+
│ Int64
118118
─────┼───────
119119
1 │ 3
120120
2 │ 4
121121
1×1 DataFrameRows
122-
Row │ x
123-
│ Int64
122+
Row │ x
123+
│ Int64
124124
─────┼───────
125125
1 │ 5
126126
```
@@ -408,12 +408,17 @@ Base.show(dfcs::DataFrameColumns;
408408
summary=summary, eltypes=eltypes, truncate=truncate, kwargs...)
409409

410410
"""
411-
mapcols(f::Union{Function, Type}, df::AbstractDataFrame)
411+
mapcols(f::Union{Function, Type}, df::AbstractDataFrame; cols=All())
412+
413+
Return a `DataFrame` where each column of `df` selected by `cols` (by default, all columns)
414+
is transformed using function `f`.
415+
Columns not selected by `cols` are copied.
412416
413-
Return a `DataFrame` where each column of `df` is transformed using function `f`.
414417
`f` must return `AbstractVector` objects all with the same length or scalars
415418
(all values other than `AbstractVector` are considered to be a scalar).
416419
420+
The `cols` column selector can be any value accepted as column selector by the `names` function.
421+
417422
Note that `mapcols` guarantees not to reuse the columns from `df` in the returned
418423
`DataFrame`. If `f` returns its argument then it gets copied before being stored.
419424
@@ -440,15 +445,32 @@ julia> mapcols(x -> x.^2, df)
440445
2 │ 4 144
441446
3 │ 9 169
442447
4 │ 16 196
448+
449+
julia> mapcols(x -> x.^2, df, cols=r"y")
450+
4×2 DataFrame
451+
Row │ x y
452+
│ Int64 Int64
453+
─────┼──────────────
454+
1 │ 1 121
455+
2 │ 2 144
456+
3 │ 3 169
457+
4 │ 4 196
443458
```
444459
"""
445-
function mapcols(f::Union{Function, Type}, df::AbstractDataFrame)
460+
function mapcols(f::Union{Function, Type}, df::AbstractDataFrame; cols=All())
461+
if cols === All() || cols === Colon()
462+
apply = Iterators.repeated(true)
463+
else
464+
picked = Set(names(df, cols))
465+
apply = Bool[name in picked for name in names(df)]
466+
end
467+
446468
# note: `f` must return a consistent length
447469
vs = AbstractVector[]
448470
seenscalar = false
449471
seenvector = false
450-
for v in eachcol(df)
451-
fv = f(v)
472+
for (v, doapply) in zip(eachcol(df), apply)
473+
fv = doapply ? f(v) : copy(v)
452474
if fv isa AbstractVector
453475
if seenscalar
454476
throw(ArgumentError("mixing scalars and vectors in mapcols not allowed"))
@@ -470,9 +492,12 @@ function mapcols(f::Union{Function, Type}, df::AbstractDataFrame)
470492
end
471493

472494
"""
473-
mapcols!(f::Union{Function, Type}, df::DataFrame)
495+
mapcols!(f::Union{Function, Type}, df::DataFrame; cols=All())
496+
497+
Update a `DataFrame` in-place where each column of `df` selected by `cols` (by default, all columns)
498+
is transformed using function `f`.
499+
Columns not selected by `cols` are left unchanged.
474500
475-
Update a `DataFrame` in-place where each column of `df` is transformed using function `f`.
476501
`f` must return `AbstractVector` objects all with the same length or scalars
477502
(all values other than `AbstractVector` are considered to be a scalar).
478503
@@ -503,20 +528,39 @@ julia> df
503528
2 │ 4 144
504529
3 │ 9 169
505530
4 │ 16 196
531+
532+
julia> mapcols!(x -> 2 * x, df, cols=r"x");
533+
534+
julia> df
535+
4×2 DataFrame
536+
Row │ x y
537+
│ Int64 Int64
538+
─────┼──────────────
539+
1 │ 2 121
540+
2 │ 8 144
541+
3 │ 18 169
542+
4 │ 32 196
506543
```
507544
"""
508-
function mapcols!(f::Union{Function, Type}, df::DataFrame)
509-
# note: `f` must return a consistent length
545+
function mapcols!(f::Union{Function,Type}, df::DataFrame; cols=All())
510546
if ncol(df) == 0 # skip if no columns
511547
_drop_all_nonnote_metadata!(df)
512548
return df
513549
end
514550

551+
if cols === All() || cols === Colon()
552+
apply = Iterators.repeated(true)
553+
else
554+
picked = Set(names(df, cols))
555+
apply = Bool[name in picked for name in names(df)]
556+
end
557+
558+
# note: `f` must return a consistent length
515559
vs = AbstractVector[]
516560
seenscalar = false
517561
seenvector = false
518-
for v in eachcol(df)
519-
fv = f(v)
562+
for (v, doapply) in zip(eachcol(df), apply)
563+
fv = doapply ? f(v) : v
520564
if fv isa AbstractVector
521565
if seenscalar
522566
throw(ArgumentError("mixing scalars and vectors in mapcols not allowed"))

test/iteration.jl

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,19 @@ end
7878
df = mapcols(x -> 2:2, df)
7979
@test df == DataFrame(a=2)
8080
@test df.a isa Vector{Int}
81+
82+
df = DataFrame(a1=[1, 2], a2=[2, 3], b=[3, 4])
83+
@test mapcols(x -> 2x, df, cols=r"a") == DataFrame(a1=[2, 4], a2=[4, 6], b=[3, 4])
84+
@test mapcols(x -> 2x, df, cols="b") == DataFrame(a1=[1, 2], a2=[2, 3], b=[6, 8])
85+
@test mapcols(x -> 2x, df, cols=Not(r"a")) == DataFrame(a1=[1, 2], a2=[2, 3], b=[6, 8])
86+
@test mapcols(x -> 2x, df, cols=Int) == DataFrame(a1=[2, 4], a2=[4, 6], b=[6, 8])
87+
@test mapcols(x -> 2x, df, cols=Not(All())) == DataFrame(a1=[1, 2], a2=[2, 3], b=[3, 4])
88+
@test mapcols(x -> 2x, df, cols=:) == DataFrame(a1=[2, 4], a2=[4, 6], b=[6, 8])
89+
90+
df2 = mapcols(x -> 2x, df, cols="b")
91+
@test df2.a1 == df.a1 && df2.a1 !== df.a1
92+
@test df2.a2 == df.a2 && df2.a2 !== df.a2
93+
@test df2.b == 2*df.b
8194
end
8295

8396
@testset "mapcols!" begin
@@ -109,6 +122,18 @@ end
109122
mapcols!(x -> 2:2, df)
110123
@test df == DataFrame(a=2)
111124
@test df.a isa Vector{Int}
125+
126+
df = DataFrame(a1=[1, 2], a2=[2, 3], b=[3, 4])
127+
@test mapcols!(x -> 2x, copy(df), cols=r"a") == DataFrame(a1=[2, 4], a2=[4, 6], b=[3, 4])
128+
@test mapcols!(x -> 2x, copy(df), cols="b") == DataFrame(a1=[1, 2], a2=[2, 3], b=[6, 8])
129+
@test mapcols!(x -> 2x, copy(df), cols=Not(r"a")) == DataFrame(a1=[1, 2], a2=[2, 3], b=[6, 8])
130+
@test mapcols!(x -> 2x, copy(df), cols=Int) == DataFrame(a1=[2, 4], a2=[4, 6], b=[6, 8])
131+
@test mapcols!(x -> 2x, copy(df), cols=Not(All())) == DataFrame(a1=[1, 2], a2=[2, 3], b=[3, 4])
132+
@test mapcols!(x -> 2x, copy(df), cols=:) == DataFrame(a1=[2, 4], a2=[4, 6], b=[6, 8])
133+
a1, a2, b = eachcol(df)
134+
mapcols!(x -> 2x, df, cols=Not(All()))
135+
@test df == DataFrame(a1=[1, 2], a2=[2, 3], b=[3, 4])
136+
@test df.a1 === a1 && df.a2 === a2 && df.b === b
112137
end
113138

114139
@testset "SubDataFrame" begin

0 commit comments

Comments
 (0)