|
9 | 9 | """
|
10 | 10 | $docstring_separate
|
11 | 11 | """
|
12 |
| -macro separate(df, from, into, sep) |
13 |
| - from_quoted = QuoteNode(from) |
14 |
| - |
15 |
| - interpolated_into, _, _ = parse_interpolation(into) |
16 |
| - |
17 |
| - if @capture(interpolated_into, (args__,)) || @capture(interpolated_into, [args__]) |
18 |
| - args = QuoteNode.(args) |
19 |
| - into_expr = :[$(args...)] |
20 |
| - else |
21 |
| - into_expr = quote |
22 |
| - if typeof($interpolated_into) <: Vector{String} |
23 |
| - Symbol.($interpolated_into) |
24 |
| - else |
25 |
| - $interpolated_into |
26 |
| - end |
| 12 | +macro separate(df, from, into, sep, args...) |
| 13 | + extra = "merge" |
| 14 | + for arg in args |
| 15 | + if isa(arg, Expr) && arg.head == :(=) |
| 16 | + if arg.args[1] == :extra |
| 17 | + extra = arg.args[2] |
27 | 18 | end
|
28 | 19 | end
|
29 |
| - |
30 |
| - return quote |
31 |
| - separate($(esc(df)), $(from_quoted), $(into_expr), $(esc(sep))) |
32 |
| - end |
| 20 | + end |
| 21 | + |
| 22 | + from_quoted = QuoteNode(from) |
| 23 | + |
| 24 | + interpolated_into, _, _ = parse_interpolation(into) |
| 25 | + |
| 26 | + if @capture(interpolated_into, (args__,)) || @capture(interpolated_into, [args__]) |
| 27 | + args = QuoteNode.(args) |
| 28 | + into_expr = :[$(args...)] |
| 29 | + else |
| 30 | + into_expr = quote |
| 31 | + if typeof($interpolated_into) <: Vector{String} |
| 32 | + Symbol.($interpolated_into) |
| 33 | + else |
| 34 | + $interpolated_into |
| 35 | + end |
| 36 | + end |
| 37 | + end |
| 38 | + |
| 39 | + return quote |
| 40 | + separate($(esc(df)), $(from_quoted), $(into_expr), $(esc(sep)); extra=$(esc(extra))) |
| 41 | + end |
33 | 42 | end
|
34 | 43 |
|
35 |
| -function separate(df::DataFrame, col::Symbol, into::Vector{Symbol}, sep::Union{Regex, String}) |
| 44 | +function separate(df::DataFrame, col::Symbol, into::Vector{Symbol}, sep::Union{Regex, String}; extra::String = "merge") |
36 | 45 | new_df = df[:, :]
|
37 | 46 | new_cols = map(x -> split(x, sep), new_df[:, col])
|
38 | 47 | max_cols = maximum(length.(new_cols))
|
39 | 48 |
|
40 |
| - if length(into) < max_cols |
41 |
| - error("Not enough names provided in `into` for all split columns.") |
| 49 | + if length(into) < max_cols && extra == "warn" |
| 50 | + @warn "Dropping extra split parts that don't fit into the provided `into` columns." |
| 51 | + max_cols = length(into) |
| 52 | + elseif length(into) < max_cols && extra == "drop" |
| 53 | + max_cols = length(into) |
| 54 | + elseif length(into) < max_cols && extra == "merge" |
| 55 | + merge = true |
| 56 | + elseif length(into) < max_cols |
| 57 | + error("Not enough names provided in \"into\" for all split columns.") |
| 58 | + else |
| 59 | + merge = false |
42 | 60 | end
|
43 | 61 |
|
44 |
| - for i in 1:max_cols |
45 |
| - new_df[:, into[i]] = map(x -> safe_getindex(x, i, missing), new_cols) |
| 62 | + for i in 1:length(into) |
| 63 | + if i < length(into) || (extra == "warn" && i <= max_cols) || (extra == "drop" && i <= max_cols) |
| 64 | + new_df[:, into[i]] = map(x -> safe_getindex(x, i, missing), new_cols) |
| 65 | + elseif i == length(into) && merge |
| 66 | + new_df[:, into[i]] = map(x -> length(x) >= i ? join(x[i:end], sep) : missing, new_cols) |
| 67 | + else |
| 68 | + for i in 1:max_cols |
| 69 | + new_df[:, into[i]] = map(x -> safe_getindex(x, i, missing), new_cols) |
| 70 | + end |
| 71 | + |
| 72 | + end |
46 | 73 | end
|
47 | 74 |
|
48 | 75 | new_df = select(new_df, Not(col))
|
49 | 76 |
|
50 | 77 | return new_df
|
51 | 78 | end
|
52 | 79 |
|
| 80 | + |
53 | 81 | """
|
54 | 82 | $docstring_unite
|
55 | 83 | """
|
56 |
| -macro unite(df, new_col, from_cols, sep) |
57 |
| - new_col_quoted = QuoteNode(new_col) |
58 |
| - interpolated_from_cols, _, _ = parse_interpolation(from_cols) |
59 |
| - interpolated_from_cols = parse_tidy(interpolated_from_cols) |
| 84 | +macro unite(df, new_col, from_cols, sep, args...) |
| 85 | + remove=true |
| 86 | + for arg in args |
| 87 | + if isa(arg, Expr) && arg.head == :(=) |
| 88 | + if arg.args[1] == :remove |
| 89 | + remove = arg.args[2] |
| 90 | + end |
| 91 | + end |
| 92 | + end |
| 93 | + new_col_quoted = QuoteNode(new_col) |
| 94 | + interpolated_from_cols, _, _ = parse_interpolation(from_cols) |
| 95 | + interpolated_from_cols = parse_tidy(interpolated_from_cols) |
60 | 96 |
|
61 |
| - if @capture(interpolated_from_cols, (first_col:last_col)) |
| 97 | + if @capture(interpolated_from_cols, (first_col:last_col)) |
62 | 98 | from_cols_expr = :($(first_col):$(last_col))
|
63 |
| - elseif @capture(interpolated_from_cols, (args__,)) || @capture(interpolated_from_cols, [args__]) |
| 99 | + elseif @capture(interpolated_from_cols, (args__,)) || @capture(interpolated_from_cols, [args__]) |
64 | 100 | args = QuoteNode.(args)
|
65 | 101 | from_cols_expr = :[$(args...)]
|
66 |
| - else |
| 102 | + else |
67 | 103 | from_cols_expr = quote
|
68 | 104 | if typeof($interpolated_from_cols) <: Tuple
|
69 | 105 | collect(Symbol.($interpolated_from_cols))
|
70 | 106 | else
|
71 |
| - $interpolated_from_cols |
| 107 | + $interpolated_from_cols |
72 | 108 | end
|
73 | 109 | end
|
74 |
| - end |
75 |
| - return quote |
76 |
| - unite($(esc(df)), $new_col_quoted, [$(from_cols_expr)], $(esc(sep))) |
77 |
| - end |
| 110 | + end |
| 111 | + |
| 112 | + return quote |
| 113 | + unite($(esc(df)), $new_col_quoted, [$(from_cols_expr)], $(esc(sep)); remove=$(esc(remove))) |
| 114 | + end |
78 | 115 | end
|
79 | 116 |
|
80 |
| -function unite(df::DataFrame, new_col_name::Symbol, columns, sep::String="_") |
| 117 | + |
| 118 | +function unite(df::DataFrame, new_col_name::Symbol, columns, sep::String="_"; remove::Bool=true) |
81 | 119 | new_df = df[:, :]
|
82 | 120 | cols_expr = columns isa Expr ? (columns,) : columns
|
83 | 121 | column_symbols = names(df, Cols(cols_expr...))
|
84 | 122 | new_df[:, new_col_name] = [join(skipmissing(row), sep) for row in eachrow(df[:, column_symbols])]
|
| 123 | + |
| 124 | + if remove |
| 125 | + new_df = select(new_df, Not(column_symbols)) |
| 126 | + end |
| 127 | + |
85 | 128 | return new_df
|
86 | 129 | end
|
87 | 130 |
|
| 131 | + |
88 | 132 | """
|
89 | 133 | $docstring_separate_rows
|
90 | 134 | """
|
|
0 commit comments