Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion src/docstrings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1413,6 +1413,22 @@ julia> @pivot_wider(df_long_missing, names_from = variable, values_from = value,
─────┼─────────────────────
1 │ 1 1 2
2 │ 2 0 4

julia> df_mult = DataFrame(
paddockId = [0, 0, 1, 1, 2, 2],
color = repeat([:red, :blue], 3),
count = repeat([3, 4], 3),
weight = [0.2, 0.3, 0.2, 0.3, 0.2, 0.2],
);

julia> @pivot_wider(df_mult, names_from = color, values_from = count:weight)
3×5 DataFrame
Row │ paddockId red_count blue_count red_weight blue_weight
│ Int64 Int64? Int64? Float64? Float64?
─────┼───────────────────────────────────────────────────────────
1 │ 0 3 4 0.2 0.3
2 │ 1 3 4 0.2 0.3
3 │ 2 3 4 0.2 0.2
```
"""

Expand Down Expand Up @@ -3231,7 +3247,7 @@ julia> df = DataFrame(name = ["Zaki", "Farida"], attributes = [
Dict("age" => 25, "city" => "New York"),
Dict("age" => 30, "city" => "Los Angeles")]);

julia> @unnest_wider(df, attributes)
julia> @chain df @unnest_wider(attributes) @relocate(name, attributes_city, attributes_age)
2×3 DataFrame
Row │ name attributes_city attributes_age
│ String String Int64
Expand Down
54 changes: 54 additions & 0 deletions src/parsing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,8 @@ function parse_escape_function(rhs_expr::Union{Expr,Symbol})
return x
elseif fn isa Symbol && hasproperty(Statistics, fn) && typeof(getproperty(Statistics, fn)) <: Function
return x
# elseif fn isa Symbol && hasproperty(Main, fn) && typeof(getproperty(Main, fn)) <: Function
# return :(Main.$fn($(args...)))
elseif contains(string(fn), r"[^\W0-9]\w*$") # valid variable name
return :($(esc(fn))($(args...)))
else
Expand All @@ -423,6 +425,8 @@ function parse_escape_function(rhs_expr::Union{Expr,Symbol})
return x
elseif fn isa Symbol && hasproperty(Statistics, fn) && typeof(getproperty(Statistics, fn)) <: Function
return x
# elseif fn isa Symbol && hasproperty(Main, fn) && typeof(getproperty(Main, fn)) <: Function
# return :(Main.$fn.($(args...)))
elseif contains(string(fn), r"[^\W0-9]\w*$") # valid variable name
return :($(esc(fn)).($(args...)))
else
Expand Down Expand Up @@ -535,3 +539,53 @@ function parse_blocks(exprs...)
end
return exprs
end

# Not exported
# The pivot_wider helper function when there are mutliple columns requires
# a vector of symbols.
function _parse_values_from(values_from, df_esc)
if values_from isa Expr && (values_from.head == :vect || values_from.head == :tuple)
quoted = [a isa QuoteNode ? a : QuoteNode(a) for a in values_from.args]
return Expr(:vect, quoted...)

elseif values_from isa Symbol
return QuoteNode(values_from)

# starts_with / startswith ------------------------------------------
elseif values_from isa Expr && values_from.head == :starts_with
pat = values_from.args[1]
return :(names($df_esc)[startswith.(String.(names($df_esc)), $pat)])

elseif values_from isa Expr && values_from.head == :call &&
(values_from.args[1] == :startswith || values_from.args[1] == :starts_with)
pat = values_from.args[2]
return :(names($df_esc)[startswith.(String.(names($df_esc)), $pat)])

# ends_with / endswith ----------------------------------------------
elseif values_from isa Expr && values_from.head == :ends_with
pat = values_from.args[1]
return :(names($df_esc)[endswith.(String.(names($df_esc)), $pat)])

elseif values_from isa Expr && values_from.head == :call &&
(values_from.args[1] == :endswith || values_from.args[1] == :ends_with)
pat = values_from.args[2]
return :(names($df_esc)[endswith.(String.(names($df_esc)), $pat)])

# plain estimate:moe ----------------------------------------------
elseif values_from isa Expr && values_from.head == :call && values_from.args[1] == :(:)
a, b = values_from.args[2:3]
return :(names($df_esc[:, Between($(QuoteNode(a)), $(QuoteNode(b)))]))

# Between(:estimate,:moe) -------------------------------------------
elseif values_from isa Expr && values_from.head == :call && values_from.args[1] == :Between
a, b = values_from.args[2:3]
return :(names($df_esc[:, Between($a, $b)]))

# if wrapped in QuoteNode, unwrap and recurse -----------------------
elseif values_from isa QuoteNode && values_from.value isa Expr
return _parse_values_from(values_from.value, df_esc)

else
return values_from # unchanged
end
end
96 changes: 86 additions & 10 deletions src/pivots.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ $docstring_pivot_wider
"""
macro pivot_wider(df, exprs...)
exprs = parse_blocks(exprs...)

# take the expressions and return arg => value dictionary
interpolated_exprs = parse_interpolation.(exprs)

Expand All @@ -23,18 +22,53 @@ macro pivot_wider(df, exprs...)
arg_dict[:fill] = eval(expr_dict[QuoteNode(:values_fill)])
end

df_expr = quote
unstack(DataFrame($(esc(df))),
$(expr_dict[QuoteNode(:names_from)]),
$(expr_dict[QuoteNode(:values_from)]);
$(arg_dict)...)
end
names_from = expr_dict[QuoteNode(:names_from)]
values_from = expr_dict[QuoteNode(:values_from)]
tidy_cols = parse_tidy(values_from)

if code[]
@info MacroTools.prettify(df_expr)
return quote
if $(tidy_cols) isa Symbol || $(tidy_cols) isa String
unstack($(esc(df)), $names_from, $(tidy_cols); $(arg_dict)...)
else
pivot_wider_multi($(esc(df)), $(names_from), names(($(esc(df))), $(tidy_cols)); $(arg_dict)...)
end
end
end

return(df_expr)
function pivot_wider_multi(df::AbstractDataFrame,
names_from_raw,
values_from;
fill = missing)

raw_name = names_from_raw isa QuoteNode ? names_from_raw.value : names_from_raw
name_col = first(col for col in names(df) if String(col) == String(raw_name))
val_cols = [first(col for col in names(df) if String(col) == String(v))
for v in values_from]

id_cols = setdiff(names(df), vcat([name_col], val_cols))

result = nothing

for (i, v) in enumerate(val_cols)
sel_cols = vcat(id_cols, [name_col, v]) |> unique
tmp = df[:, sel_cols]
wide = unstack(tmp, name_col, v; fill = fill)
if name_col in names(wide)
select!(wide, Not(name_col))
end

suffix = String(values_from[i])
rename!(wide, Dict(c => Symbol(string(c), "_", suffix)
for c in setdiff(names(wide), id_cols)))

if result === nothing
result = wide
else
sort!(wide, id_cols)
result = hcat(result, select(wide, Not(id_cols)); makeunique = true)
end
end
return result
end

"""
Expand Down Expand Up @@ -83,3 +117,45 @@ macro pivot_longer(df, exprs...)
return df_expr
end

function parse_values_from(vf, df_esc)
sel = parse_tidy(vf; subset = true) # let TidierData do most work

# Between(:a,:b) → names(df[:, Between(:a,:b)])
if sel isa Expr && sel.head == :Between
return :(names($df_esc[:, $sel]))

# Cols( … ) -----------------------------------------------
elseif sel isa Expr && sel.head == :Cols
inner = sel.args[1]

# starts_with / startswith inside Cols()
if inner isa Expr && inner.head == :call &&
(inner.args[1] == :startswith || inner.args[1] == :starts_with)
pat = inner.args[2]
return :(names($df_esc)[startswith.(String.(names($df_esc)), $pat)])

# ends_with / endswith inside Cols()
elseif inner isa Expr && inner.head == :call &&
(inner.args[1] == :endswith || inner.args[1] == :ends_with)
pat = inner.args[2]
return :(names($df_esc)[endswith.(String.(names($df_esc)), $pat)])

# any other Cols() selector → use it directly
else
return :(names($df_esc)[$sel])
end
end

# vectors already explicit, leave as-is
if (vf isa Expr && (vf.head == :vect || vf.head == :tuple)) || vf isa QuoteNode
return vf
end

# bare Symbol → QuoteNode(Symbol)
if vf isa Symbol
return QuoteNode(vf)
end

# fallback (rare)
return vf
end
Loading