@@ -117,9 +117,9 @@ Compat.hasproperty(df::AbstractDataFrame, s::AbstractString) = haskey(index(df),
117117
118118"""
119119 rename!(df::AbstractDataFrame, vals::AbstractVector{Symbol};
120- makeunique::Bool=false)
120+ makeunique::Bool=false, dupcol::Symbol=:error )
121121 rename!(df::AbstractDataFrame, vals::AbstractVector{<:AbstractString};
122- makeunique::Bool=false)
122+ makeunique::Bool=false, dupcol::Symbol=:error )
123123 rename!(df::AbstractDataFrame, (from => to)::Pair...)
124124 rename!(df::AbstractDataFrame, d::AbstractDict)
125125 rename!(df::AbstractDataFrame, d::AbstractVector{<:Pair})
@@ -179,9 +179,9 @@ julia> rename!(df, [:a, :b, :c])
179179 1 │ 1 2 3
180180
181181julia> rename!(df, [:a, :b, :a])
182- ERROR: ArgumentError: Duplicate variable names: :a. Pass makeunique=true to make them unique using a suffix automatically.
182+ ERROR: ArgumentError: Duplicate variable names: :a. Pass dupcol=:makeunique to make them unique using a suffix automatically.
183183
184- julia> rename!(df, [:a, :b, :a], makeunique=true )
184+ julia> rename!(df, [:a, :b, :a], dupcol=:makeunique )
1851851×3 DataFrame
186186 Row │ a b a_1
187187 │ Int64 Int64 Int64
@@ -197,16 +197,16 @@ julia> rename!(uppercase, df)
197197```
198198"""
199199function rename! (df:: AbstractDataFrame , vals:: AbstractVector{Symbol} ;
200- makeunique:: Bool = false )
201- rename! (index (df), vals, makeunique= makeunique)
200+ makeunique:: Bool = false , dupcol :: Symbol = :error )
201+ rename! (index (df), vals, makeunique= makeunique, dupcol = dupcol )
202202 # renaming columns of SubDataFrame has to clean non-note metadata in its parent
203203 _drop_all_nonnote_metadata! (parent (df))
204204 return df
205205end
206206
207207function rename! (df:: AbstractDataFrame , vals:: AbstractVector{<:AbstractString} ;
208- makeunique:: Bool = false )
209- rename! (index (df), Symbol .(vals), makeunique= makeunique)
208+ makeunique:: Bool = false , dupcol :: Symbol = :error )
209+ rename! (index (df), Symbol .(vals), makeunique= makeunique, dupcol = dupcol )
210210 # renaming columns of SubDataFrame has to clean non-note metadata in its parent
211211 _drop_all_nonnote_metadata! (parent (df))
212212 return df
@@ -353,9 +353,9 @@ julia> rename(uppercase, df)
353353```
354354"""
355355rename (df:: AbstractDataFrame , vals:: AbstractVector{Symbol} ;
356- makeunique:: Bool = false ) = rename! (copy (df), vals, makeunique= makeunique)
356+ makeunique:: Bool = false , dupcol :: Symbol = :error ) = rename! (copy (df), vals, makeunique= makeunique, dupcol = dupcol )
357357rename (df:: AbstractDataFrame , vals:: AbstractVector{<:AbstractString} ;
358- makeunique:: Bool = false ) = rename! (copy (df), vals, makeunique= makeunique)
358+ makeunique:: Bool = false , dupcol :: Symbol = :error ) = rename! (copy (df), vals, makeunique= makeunique, dupcol = dupcol )
359359rename (df:: AbstractDataFrame , args... ) = rename! (copy (df), args... )
360360rename (f:: Function , df:: AbstractDataFrame ) = rename! (f, copy (df))
361361
@@ -1536,13 +1536,20 @@ end
15361536
15371537"""
15381538 hcat(df::AbstractDataFrame...;
1539- makeunique::Bool=false, copycols::Bool=true)
1539+ makeunique::Bool=false, dupcol::Symbol=:error, copycols::Bool=true)
15401540
15411541Horizontally concatenate data frames.
15421542
15431543If `makeunique=false` (the default) column names of passed objects must be unique.
15441544If `makeunique=true` then duplicate column names will be suffixed
15451545with `_i` (`i` starting at 1 for the first duplicate).
1546+ Deprecated in favor of `dupcol`
1547+
1548+ If `dupcol=:error` (the default) then columns names of passed objects must be unique.
1549+ If `dupcol=:makeunique` then duplicate column names will be suffixed
1550+ with `_i` (`i` starting at 1 for the first duplicate).
1551+ If `dupcol=:update` then duplicate columns names will be combined with the left-hand
1552+ column overwritten by non-missing values from the right hand column(s)
15461553
15471554If `copycols=true` (the default) then the `DataFrame` returned by `hcat` will
15481555contain copied columns from the source data frames.
@@ -1575,7 +1582,7 @@ julia> df2 = DataFrame(A=4:6, B=4:6)
15751582 2 │ 5 5
15761583 3 │ 6 6
15771584
1578- julia> df3 = hcat(df1, df2, makeunique=true )
1585+ julia> df3 = hcat(df1, df2, dupcol=:makeunique )
157915863×4 DataFrame
15801587 Row │ A B A_1 B_1
15811588 │ Int64 Int64 Int64 Int64
@@ -1587,32 +1594,32 @@ julia> df3 = hcat(df1, df2, makeunique=true)
15871594julia> df3.A === df1.A
15881595false
15891596
1590- julia> df3 = hcat(df1, df2, makeunique=true , copycols=false);
1597+ julia> df3 = hcat(df1, df2, dupcol=:makeunique , copycols=false);
15911598
15921599julia> df3.A === df1.A
15931600true
15941601```
15951602"""
1596- function Base. hcat (df:: AbstractDataFrame ; makeunique:: Bool = false , copycols:: Bool = true )
1603+ function Base. hcat (df:: AbstractDataFrame ; makeunique:: Bool = false , dupcol :: Symbol = :error , copycols:: Bool = true )
15971604 df = DataFrame (df, copycols= copycols)
15981605 _drop_all_nonnote_metadata! (df)
15991606 return df
16001607end
16011608
16021609# TODO : after deprecation remove AbstractVector methods
1603- Base. hcat (df:: AbstractDataFrame , x:: AbstractVector ; makeunique:: Bool = false , copycols:: Bool = true ) =
1604- hcat! (DataFrame (df, copycols= copycols), x, makeunique= makeunique, copycols= copycols)
1605- Base. hcat (x:: AbstractVector , df:: AbstractDataFrame ; makeunique:: Bool = false , copycols:: Bool = true ) =
1606- hcat! (x, df, makeunique= makeunique, copycols= copycols)
1610+ Base. hcat (df:: AbstractDataFrame , x:: AbstractVector ; makeunique:: Bool = false , dupcol :: Symbol = :error , copycols:: Bool = true ) =
1611+ hcat! (DataFrame (df, copycols= copycols), x, makeunique= makeunique, dupcol = dupcol, copycols= copycols)
1612+ Base. hcat (x:: AbstractVector , df:: AbstractDataFrame ; makeunique:: Bool = false , dupcol :: Symbol = :error , copycols:: Bool = true ) =
1613+ hcat! (x, df, makeunique= makeunique, dupcol = dupcol, copycols= copycols)
16071614Base. hcat (df1:: AbstractDataFrame , df2:: AbstractDataFrame ;
1608- makeunique:: Bool = false , copycols:: Bool = true ) =
1615+ makeunique:: Bool = false , dupcol :: Symbol = :error , copycols:: Bool = true ) =
16091616 hcat! (DataFrame (df1, copycols= copycols), df2,
1610- makeunique= makeunique, copycols= copycols)
1617+ makeunique= makeunique, dupcol = dupcol, copycols= copycols)
16111618Base. hcat (df:: AbstractDataFrame , x:: Union{AbstractVector, AbstractDataFrame} ,
16121619 y:: Union{AbstractVector, AbstractDataFrame} ...;
1613- makeunique:: Bool = false , copycols:: Bool = true ) =
1614- hcat! (hcat (df, x, makeunique= makeunique, copycols= copycols), y... ,
1615- makeunique= makeunique, copycols= copycols)
1620+ makeunique:: Bool = false , dupcol :: Symbol = :error , copycols:: Bool = true ) =
1621+ hcat! (hcat (df, x, makeunique= makeunique, dupcol = dupcol, copycols= copycols), y... ,
1622+ makeunique= makeunique, dupcol = dupcol, copycols= copycols)
16161623
16171624"""
16181625 vcat(dfs::AbstractDataFrame...;
@@ -2870,6 +2877,10 @@ const INSERTCOLS_ARGUMENTS =
28702877 - `makeunique` : defines what to do if `name` already exists in `df`;
28712878 if it is `false` an error will be thrown; if it is `true` a new unique name will
28722879 be generated by adding a suffix
2880+ - `dupcol` : defines what to do if `name` already exists in `df`;
2881+ if it is :error an error will be thrown; if is :makeunique a new unique name will
2882+ be generated by adding a suffix; if it is :update then the existing column will be
2883+ updated with the non-missing values
28732884 - `copycols` : whether vectors passed as columns should be copied
28742885
28752886 If `val` is an `AbstractRange` then the result of `collect(val)` is inserted.
@@ -2891,7 +2902,7 @@ const INSERTCOLS_ARGUMENTS =
28912902
28922903"""
28932904 insertcols(df::AbstractDataFrame[, col], (name=>val)::Pair...;
2894- after::Bool=false, makeunique::Bool=false, copycols::Bool=true)
2905+ after::Bool=false, makeunique::Bool=false, dupcol=:error, copycols::Bool=true)
28952906
28962907Insert a column into a copy of `df` data frame using the [`insertcols!`](@ref)
28972908function and return the newly created data frame.
@@ -2922,7 +2933,7 @@ julia> insertcols(df, 1, :b => 'a':'c')
29222933 2 │ b 2
29232934 3 │ c 3
29242935
2925- julia> insertcols(df, :c => 2:4, :c => 3:5, makeunique=true )
2936+ julia> insertcols(df, :c => 2:4, :c => 3:5, dupcol=:error )
292629373×3 DataFrame
29272938 Row │ a c c_1
29282939 │ Int64 Int64 Int64
@@ -2942,13 +2953,13 @@ julia> insertcols(df, :a, :d => 7:9, after=true)
29422953```
29432954"""
29442955insertcols (df:: AbstractDataFrame , args... ;
2945- after:: Bool = false , makeunique:: Bool = false , copycols:: Bool = true ) =
2956+ after:: Bool = false , makeunique:: Bool = false , dupcol :: Symbol = :error , copycols:: Bool = true ) =
29462957 insertcols! (copy (df), args... ;
2947- after= after, makeunique= makeunique, copycols= copycols)
2958+ after= after, makeunique= makeunique, dupcol = dupcol, copycols= copycols)
29482959
29492960"""
29502961 insertcols!(df::AbstractDataFrame[, col], (name=>val)::Pair...;
2951- after::Bool=false, makeunique::Bool=false, copycols::Bool=true)
2962+ after::Bool=false, makeunique::Bool=false, dupcol::Symbol=:error, copycols::Bool=true)
29522963
29532964Insert a column into a data frame in place. Return the updated data frame.
29542965
@@ -2979,7 +2990,7 @@ julia> insertcols!(df, 1, :b => 'a':'c')
29792990 2 │ b 2
29802991 3 │ c 3
29812992
2982- julia> insertcols!(df, 2, :c => 2:4, :c => 3:5, makeunique=true )
2993+ julia> insertcols!(df, 2, :c => 2:4, :c => 3:5, dupcol=:error )
298329943×4 DataFrame
29842995 Row │ b c c_1 a
29852996 │ Char Int64 Int64 Int64
@@ -2999,7 +3010,10 @@ julia> insertcols!(df, :b, :d => 7:9, after=true)
29993010```
30003011"""
30013012function insertcols! (df:: AbstractDataFrame , col:: ColumnIndex , name_cols:: Pair{Symbol} ...;
3002- after:: Bool = false , makeunique:: Bool = false , copycols:: Bool = true )
3013+ after:: Bool = false , makeunique:: Bool = false , dupcol:: Symbol = :error , copycols:: Bool = true )
3014+
3015+ dupcol = _dupcol (dupcol, makeunique)
3016+
30033017 if ! is_column_insertion_allowed (df)
30043018 throw (ArgumentError (" insertcols! is only supported for DataFrame, or for " *
30053019 " SubDataFrame created with `:` as column selector" ))
@@ -3025,15 +3039,15 @@ function insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{Sy
30253039 " $(ncol (df)) columns at index $col_ind " ))
30263040 end
30273041
3028- if ! makeunique
3042+ if dupcol == :error
30293043 if ! allunique (first .(name_cols))
30303044 throw (ArgumentError (" Names of columns to be inserted into a data frame " *
3031- " must be unique when `makeunique=true `" ))
3045+ " must be unique when `dupcol=:error `" ))
30323046 end
30333047 for (n, _) in name_cols
30343048 if hasproperty (df, n)
30353049 throw (ArgumentError (" Column $n is already present in the data frame " *
3036- " which is not allowed when `makeunique=true `" ))
3050+ " which is not allowed when `dupcol=:error `" ))
30373051 end
30383052 end
30393053 end
@@ -3103,19 +3117,28 @@ function insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{Sy
31033117 dfp[! , name] = item_new
31043118 else
31053119 if hasproperty (dfp, name)
3106- @assert makeunique
3107- k = 1
3108- while true
3109- nn = Symbol (" $(name) _$k " )
3110- if ! hasproperty (dfp, nn)
3111- name = nn
3112- break
3120+ if dupcol == :makeunique
3121+ k = 1
3122+ while true
3123+ nn = Symbol (" $(name) _$k " )
3124+ if ! hasproperty (dfp, nn)
3125+ name = nn
3126+ break
3127+ end
3128+ k += 1
31133129 end
3114- k += 1
3130+ insert! (index (dfp), col_ind, name)
3131+ insert! (_columns (dfp), col_ind, item_new)
3132+ else
3133+ @assert dupcol == :update
3134+ # Just update without adding to index
3135+ dfp[! , name] = _update_missing .(dfp[! , name], item_new)
3136+ col_ind -= 1
31153137 end
3138+ else
3139+ insert! (index (dfp), col_ind, name)
3140+ insert! (_columns (dfp), col_ind, item_new)
31163141 end
3117- insert! (index (dfp), col_ind, name)
3118- insert! (_columns (dfp), col_ind, item_new)
31193142 end
31203143 col_ind += 1
31213144 end
@@ -3134,22 +3157,22 @@ function insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{Sy
31343157end
31353158
31363159insertcols! (df:: AbstractDataFrame , col:: ColumnIndex , name_cols:: Pair{<:AbstractString} ...;
3137- after:: Bool = false , makeunique:: Bool = false , copycols:: Bool = true ) =
3160+ after:: Bool = false , makeunique:: Bool = false , dupcol :: Symbol = :error , copycols:: Bool = true ) =
31383161 insertcols! (df, col, (Symbol (n) => v for (n, v) in name_cols). .. ,
3139- after= after, makeunique= makeunique, copycols= copycols)
3162+ after= after, makeunique= makeunique, dupcol = dupcol, copycols= copycols)
31403163
31413164insertcols! (df:: AbstractDataFrame , name_cols:: Pair{Symbol} ...;
3142- after:: Bool = false , makeunique:: Bool = false , copycols:: Bool = true ) =
3165+ after:: Bool = false , makeunique:: Bool = false , dupcol :: Symbol = :error , copycols:: Bool = true ) =
31433166 insertcols! (df, ncol (df)+ 1 , name_cols... , after= after,
3144- makeunique= makeunique, copycols= copycols)
3167+ makeunique= makeunique, dupcol = dupcol, copycols= copycols)
31453168
31463169insertcols! (df:: AbstractDataFrame , name_cols:: Pair{<:AbstractString} ...;
3147- after:: Bool = false , makeunique:: Bool = false , copycols:: Bool = true ) =
3170+ after:: Bool = false , makeunique:: Bool = false , dupcol :: Symbol = :error , copycols:: Bool = true ) =
31483171 insertcols! (df, (Symbol (n) => v for (n, v) in name_cols). .. ,
3149- after= after, makeunique= makeunique, copycols= copycols)
3172+ after= after, makeunique= makeunique, dupcol = dupcol, copycols= copycols)
31503173
31513174function insertcols! (df:: AbstractDataFrame , col:: ColumnIndex ; after:: Bool = false ,
3152- makeunique:: Bool = false , copycols:: Bool = true )
3175+ makeunique:: Bool = false , dupcol :: Symbol = :error , copycols:: Bool = true )
31533176 if col isa SymbolOrString
31543177 col_ind = Int (columnindex (df, col))
31553178 if col_ind == 0
@@ -3173,7 +3196,7 @@ function insertcols!(df::AbstractDataFrame, col::ColumnIndex; after::Bool=false,
31733196end
31743197
31753198function insertcols! (df:: AbstractDataFrame ; after:: Bool = false ,
3176- makeunique:: Bool = false , copycols:: Bool = true )
3199+ makeunique:: Bool = false , dupcol :: Symbol = :error , copycols:: Bool = true )
31773200 _drop_all_nonnote_metadata! (parent (df))
31783201 return df
31793202end
0 commit comments