diff --git a/src/docstrings.jl b/src/docstrings.jl index e038a6f..57de3cc 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -1413,6 +1413,22 @@ julia> @pivot_wider(df_long_missing, names_from = variable, values_from = value, ─────┼───────────────────── 1 │ 1 1 2 2 │ 2 0 4 + +julia> df_mult = DataFrame( + paddockId = [0, 0, 1, 1, 2, 2], + color = repeat([:red, :blue], 3), + count = repeat([3, 4], 3), + weight = [0.2, 0.3, 0.2, 0.3, 0.2, 0.2], + ); + +julia> @pivot_wider(df_mult, names_from = color, values_from = count:weight) +3×5 DataFrame + Row │ paddockId red_count blue_count red_weight blue_weight + │ Int64 Int64? Int64? Float64? Float64? +─────┼─────────────────────────────────────────────────────────── + 1 │ 0 3 4 0.2 0.3 + 2 │ 1 3 4 0.2 0.3 + 3 │ 2 3 4 0.2 0.2 ``` """ @@ -3231,7 +3247,7 @@ julia> df = DataFrame(name = ["Zaki", "Farida"], attributes = [ Dict("age" => 25, "city" => "New York"), Dict("age" => 30, "city" => "Los Angeles")]); -julia> @unnest_wider(df, attributes) +julia> @chain df @unnest_wider(attributes) @relocate(name, attributes_city, attributes_age) 2×3 DataFrame Row │ name attributes_city attributes_age │ String String Int64 diff --git a/src/parsing.jl b/src/parsing.jl index 79a674d..86ed61d 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -409,6 +409,8 @@ function parse_escape_function(rhs_expr::Union{Expr,Symbol}) return x elseif fn isa Symbol && hasproperty(Statistics, fn) && typeof(getproperty(Statistics, fn)) <: Function return x + # elseif fn isa Symbol && hasproperty(Main, fn) && typeof(getproperty(Main, fn)) <: Function + # return :(Main.$fn($(args...))) elseif contains(string(fn), r"[^\W0-9]\w*$") # valid variable name return :($(esc(fn))($(args...))) else @@ -423,6 +425,8 @@ function parse_escape_function(rhs_expr::Union{Expr,Symbol}) return x elseif fn isa Symbol && hasproperty(Statistics, fn) && typeof(getproperty(Statistics, fn)) <: Function return x + # elseif fn isa Symbol && hasproperty(Main, fn) && typeof(getproperty(Main, fn)) <: Function + # return :(Main.$fn.($(args...))) elseif contains(string(fn), r"[^\W0-9]\w*$") # valid variable name return :($(esc(fn)).($(args...))) else @@ -535,3 +539,53 @@ function parse_blocks(exprs...) end return exprs end + +# Not exported +# The pivot_wider helper function when there are mutliple columns requires +# a vector of symbols. +function _parse_values_from(values_from, df_esc) + if values_from isa Expr && (values_from.head == :vect || values_from.head == :tuple) + quoted = [a isa QuoteNode ? a : QuoteNode(a) for a in values_from.args] + return Expr(:vect, quoted...) + + elseif values_from isa Symbol + return QuoteNode(values_from) + + # starts_with / startswith ------------------------------------------ + elseif values_from isa Expr && values_from.head == :starts_with + pat = values_from.args[1] + return :(names($df_esc)[startswith.(String.(names($df_esc)), $pat)]) + + elseif values_from isa Expr && values_from.head == :call && + (values_from.args[1] == :startswith || values_from.args[1] == :starts_with) + pat = values_from.args[2] + return :(names($df_esc)[startswith.(String.(names($df_esc)), $pat)]) + + # ends_with / endswith ---------------------------------------------- + elseif values_from isa Expr && values_from.head == :ends_with + pat = values_from.args[1] + return :(names($df_esc)[endswith.(String.(names($df_esc)), $pat)]) + + elseif values_from isa Expr && values_from.head == :call && + (values_from.args[1] == :endswith || values_from.args[1] == :ends_with) + pat = values_from.args[2] + return :(names($df_esc)[endswith.(String.(names($df_esc)), $pat)]) + + # plain estimate:moe ---------------------------------------------- + elseif values_from isa Expr && values_from.head == :call && values_from.args[1] == :(:) + a, b = values_from.args[2:3] + return :(names($df_esc[:, Between($(QuoteNode(a)), $(QuoteNode(b)))])) + + # Between(:estimate,:moe) ------------------------------------------- + elseif values_from isa Expr && values_from.head == :call && values_from.args[1] == :Between + a, b = values_from.args[2:3] + return :(names($df_esc[:, Between($a, $b)])) + + # if wrapped in QuoteNode, unwrap and recurse ----------------------- + elseif values_from isa QuoteNode && values_from.value isa Expr + return _parse_values_from(values_from.value, df_esc) + + else + return values_from # unchanged + end +end \ No newline at end of file diff --git a/src/pivots.jl b/src/pivots.jl index 0d71f0e..685f84a 100644 --- a/src/pivots.jl +++ b/src/pivots.jl @@ -3,7 +3,6 @@ $docstring_pivot_wider """ macro pivot_wider(df, exprs...) exprs = parse_blocks(exprs...) - # take the expressions and return arg => value dictionary interpolated_exprs = parse_interpolation.(exprs) @@ -23,18 +22,53 @@ macro pivot_wider(df, exprs...) arg_dict[:fill] = eval(expr_dict[QuoteNode(:values_fill)]) end - df_expr = quote - unstack(DataFrame($(esc(df))), - $(expr_dict[QuoteNode(:names_from)]), - $(expr_dict[QuoteNode(:values_from)]); - $(arg_dict)...) - end + names_from = expr_dict[QuoteNode(:names_from)] + values_from = expr_dict[QuoteNode(:values_from)] + tidy_cols = parse_tidy(values_from) - if code[] - @info MacroTools.prettify(df_expr) + return quote + if $(tidy_cols) isa Symbol || $(tidy_cols) isa String + unstack($(esc(df)), $names_from, $(tidy_cols); $(arg_dict)...) + else + pivot_wider_multi($(esc(df)), $(names_from), names(($(esc(df))), $(tidy_cols)); $(arg_dict)...) + end end +end - return(df_expr) +function pivot_wider_multi(df::AbstractDataFrame, + names_from_raw, + values_from; + fill = missing) + + raw_name = names_from_raw isa QuoteNode ? names_from_raw.value : names_from_raw + name_col = first(col for col in names(df) if String(col) == String(raw_name)) + val_cols = [first(col for col in names(df) if String(col) == String(v)) + for v in values_from] + + id_cols = setdiff(names(df), vcat([name_col], val_cols)) + + result = nothing + + for (i, v) in enumerate(val_cols) + sel_cols = vcat(id_cols, [name_col, v]) |> unique + tmp = df[:, sel_cols] + wide = unstack(tmp, name_col, v; fill = fill) + if name_col in names(wide) + select!(wide, Not(name_col)) + end + + suffix = String(values_from[i]) + rename!(wide, Dict(c => Symbol(string(c), "_", suffix) + for c in setdiff(names(wide), id_cols))) + + if result === nothing + result = wide + else + sort!(wide, id_cols) + result = hcat(result, select(wide, Not(id_cols)); makeunique = true) + end + end + return result end """ @@ -83,3 +117,45 @@ macro pivot_longer(df, exprs...) return df_expr end +function parse_values_from(vf, df_esc) + sel = parse_tidy(vf; subset = true) # let TidierData do most work + + # Between(:a,:b) → names(df[:, Between(:a,:b)]) + if sel isa Expr && sel.head == :Between + return :(names($df_esc[:, $sel])) + + # Cols( … ) ----------------------------------------------- + elseif sel isa Expr && sel.head == :Cols + inner = sel.args[1] + + # starts_with / startswith inside Cols() + if inner isa Expr && inner.head == :call && + (inner.args[1] == :startswith || inner.args[1] == :starts_with) + pat = inner.args[2] + return :(names($df_esc)[startswith.(String.(names($df_esc)), $pat)]) + + # ends_with / endswith inside Cols() + elseif inner isa Expr && inner.head == :call && + (inner.args[1] == :endswith || inner.args[1] == :ends_with) + pat = inner.args[2] + return :(names($df_esc)[endswith.(String.(names($df_esc)), $pat)]) + + # any other Cols() selector → use it directly + else + return :(names($df_esc)[$sel]) + end + end + + # vectors already explicit, leave as-is + if (vf isa Expr && (vf.head == :vect || vf.head == :tuple)) || vf isa QuoteNode + return vf + end + + # bare Symbol → QuoteNode(Symbol) + if vf isa Symbol + return QuoteNode(vf) + end + + # fallback (rare) + return vf +end \ No newline at end of file