JuliaData · bkamins · Feb 11, 2023 · Feb 5, 2023 · Feb 5, 2023 · Feb 6, 2023
diff --git a/Project.toml b/Project.toml
@@ -6,6 +6,7 @@ version = "1.5.0"
 Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
 DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
 Future = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+InlineStrings = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48"
 InvertedIndices = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
 IteratorInterfaceExtensions = "82899510-4779-5014-852e-03e436cf321d"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -22,6 +23,7 @@ SortingAlgorithms = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 TableTraits = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
+SentinelArrays = "91c51154-3ec4-41a3-a24f-3f23e20d615c"
 Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
 
 [compat]
@@ -35,6 +37,7 @@ Missings = "0.4.2, 1"
 PooledArrays = "1.4.2"
 PrettyTables = "2.1"
 Reexport = "0.1, 0.2, 1"
+SentinelArrays = "1.2"
 ShiftedArrays = "1, 2"
 SnoopPrecompile = "1"
 SortingAlgorithms = "0.1, 0.2, 0.3, 1"

diff --git a/docs/src/man/basics.md b/docs/src/man/basics.md
@@ -16,7 +16,7 @@ or
 ```julia
 julia> ] # ']' should be pressed
 
-(@v1.6) pkg> add DataFrames
+(@v1.9) pkg> add DataFrames
 ```
 
 If you want to make sure everything works as expected you can run the tests
@@ -35,9 +35,9 @@ you have installed with the `status` command.
 ```julia
 julia> ]
 
-(@v1.6) pkg> status DataFrames
-      Status `C:\Users\TeAmp0is0N\.julia\environments\v1.6\Project.toml`
-  [a93c6f00] DataFrames v1.1.1
+(@v1.9) pkg> status DataFrames
+      Status `~\v1.6\Project.toml`
+  [a93c6f00] DataFrames v1.5.0
 ```
 
 Throughout the rest of the tutorial we will assume that you have installed the
@@ -52,6 +52,40 @@ The most fundamental type provided by DataFrames.jl is `DataFrame`, where
 typically each row is interpreted as an observation and each column as a
 feature.
 
+!!! note "Advanced installation configuration"
+
+    **Advanced installation settings.**
+    DataFrames.jl puts in extra time and effort when the package is being built
+    (precompiled) to make sure it is more responsive when you are using it.
+    However, in some scenarios users might want to avoid this extra
+    precompilaion effort to reduce the time needed to build the package and
+    later to load it. To disable precompilation of DataFrames.jl in your current
+    project you need to install the
+    [SnoopPrecompile.jl](https://timholy.github.io/SnoopCompile.jl/stable/snoop_pc/)
+    and [Preferences.jl](https://github.com/JuliaPackaging/Preferences.jl)
+    packages and then run the following code:
+    ```
+    using SnoopPrecompile, Preferences
+    Preferences.set_preferences!(SnoopPrecompile,
+        "skip_precompile" => union(Preferences.load_preference(SnoopPrecompile,
+                                                               "skip_precompile",
+                                                               String[]),
+                                   ["DataFrames"]);
+        force=true)
+    ```
+    If you later would want to re-enable precompilation of DataFrames.jl you
+    can do it using the following commands:
+    ```
+    using SnoopPrecompile, Preferences
+    Preferences.set_preferences!(SnoopPrecompile,
+        "skip_precompile" =>
+        filter(!=("DataFrames"),
+               Preferences.load_preference(SnoopPrecompile,
+                                           "skip_precompile",
+                                           String[]));
+        force=true)
+    ```
+
 ## Constructors and Basic Utility Functions
 
 ### Constructors
@@ -1785,7 +1819,7 @@ in them:
 julia> select(german, Not(["Age", "Saving accounts", "Checking account",
                            "Credit amount", "Purpose"]))
 1000×5 DataFrame
-  Row │ id     Sex      Job    Housing  Duration 
+  Row │ id     Sex      Job    Housing  Duration
       │ Int64  String7  Int64  String7  Int64
 ──────┼──────────────────────────────────────────
     1 │     0  male         2  own             6

diff --git a/src/DataFrames.jl b/src/DataFrames.jl
@@ -11,6 +11,8 @@ using PrettyTables
 using Random
 using Tables: ByRow
 import SnoopPrecompile
+import SentinelArrays
+import InlineStrings
 
 import DataAPI,
        DataAPI.allcombinations,

diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
@@ -1892,6 +1892,12 @@ function Base.reduce(::typeof(vcat),
     return res
 end
 
+# definition needed to avoid dispatch ambiguity
+Base.reduce(::typeof(vcat),
+            dfs::SentinelArrays.ChainedVector{T, A} where {T<:AbstractDataFrame,
+                                                           A<:AbstractVector{T}}) =
+    reduce(vcat, collect(AbstractDataFrame, dfs))
+
 function _vcat(dfs::AbstractVector{AbstractDataFrame};
                cols::Union{Symbol, AbstractVector{Symbol},
                            AbstractVector{<:AbstractString}}=:setequal)

diff --git a/src/other/precompile.jl b/src/other/precompile.jl
@@ -11,8 +11,8 @@ SnoopPrecompile.@precompile_all_calls begin
     combine(df, :c, [:c :f] .=> [sum, mean, std], :c => :d, [:a, :c] => cor)
     transform(df, :c, [:c :f] .=> [sum, mean, std], :c => :d, [:a, :c] => cor)
     groupby(df, :a)
-    groupby(df, :q)
     groupby(df, :p)
+    groupby(df, :q)
     gdf = groupby(df, :b)
     combine(gdf, :c, [:c :f] .=> [sum, mean, std], :c => :d, [:a, :c] => cor)
     transform(gdf, :c, [:c :f] .=> [sum, mean, std], :c => :d, [:a, :c] => cor)
@@ -22,16 +22,52 @@ SnoopPrecompile.@precompile_all_calls begin
     outerjoin(df, df, on=:a, makeunique=true)
     outerjoin(df, df, on=:b, makeunique=true)
     outerjoin(df, df, on=:c, makeunique=true)
-    semijoin(df, df, on=:a)
-    semijoin(df, df, on=:b)
-    semijoin(df, df, on=:c)
     leftjoin!(df, DataFrame(a=[2, 5, 3, 1, 0]), on=:a)
     leftjoin!(df, DataFrame(b=["a", "b", "c", "d", "e"]), on=:b)
     leftjoin!(df, DataFrame(c=1:5), on=:c)
     reduce(vcat, [df, df])
     show(IOBuffer(), df)
     subset(df, :q)
-    @view df[1:3, :]
+    subset!(copy(df), :q)
+    df[:, 1:2]
+    df[1:2, :]
+    df[1:2, 1:2]
     @view df[:, 1:2]
+    @view df[1:2, :]
+    @view df[1:2, 1:2]
     transform!(df, :c, [:c :f] .=> [sum, mean, std], :c => :d, [:a, :c] => cor)
+    deleteat!(df, 1)
+    append!(df, copy(df))
+    push!(df, copy(df[1, :]))
+    eachrow(df)
+    eachcol(df)
+    empty(df)
+    empty!(copy(df))
+    filter(:q => identity, df)
+    filter!(:q => identity, df)
+    first(df)
+    last(df)
+    hcat(df, df, makeunique=true)
+    issorted(df)
+    pop!(df)
+    popfirst!(df)
+    repeat(df, 2)
+    reverse(df)
+    reverse!(df)
+    unique(df, :a)
+    unique!(df, :a)
+    wide = DataFrame(id=1:6,
+                     a=repeat(1:3, inner=2),
+                     b=repeat(1.0:2.0, inner=3),
+                     c=repeat(1.0:1.0, inner=6),
+                     d=repeat(1.0:3.0, inner=2))
+    long = stack(wide)
+    unstack(long)
+    unstack(long, :variable, :value, combine=sum)
+    flatten(DataFrame(a=[[1, 2], [3, 4]], b=[1, 2]), :a)
+    dropmissing(DataFrame(a=[1, 2, 3, missing], b=["a", missing, "c", "d"]))
+    df = DataFrame(rand(20, 2), :auto)
+    df.id = repeat(1:2, 10)
+    combine(df, AsTable(r"x") .=> [ByRow(sum), ByRow(mean)])
+    combine(groupby(df, :id), AsTable(r"x") .=> [ByRow(sum), ByRow(mean)])
 end
diff --git a/test/dataframe.jl b/test/dataframe.jl
@@ -1892,6 +1892,11 @@ end
                            DataFrame(c=[missing, missing]))
 end
 
+@testset "vcat ChainedVector ambiguity" begin
+    dfs = DataFrames.SentinelArrays.ChainedVector([[DataFrame(a=1)], [DataFrame(a=2)]])
+    @test reduce(vcat, dfs) == DataFrame(a=1:2)
+end
+
 @testset "names for Type, predicate + standard tests of cols" begin
     df_long = DataFrame(a1=1:3, a2=[1, missing, 3],
                         b1=1.0:3.0, b2=[1.0, missing, 3.0],