Skip to content
This repository was archived by the owner on May 21, 2022. It is now read-only.

Commit bbec1b2

Browse files
authored
Support DataFrames 0.21 (#58)
Note DataFrames 0.18 do not support the new ! syntax.
1 parent 5b61638 commit bbec1b2

File tree

5 files changed

+42
-44
lines changed

5 files changed

+42
-44
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
1313
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
1414

1515
[compat]
16-
DataFrames = "0.17, 0.18, 0.19, 0.20"
16+
DataFrames = "0.19, 0.20, 0.21"
1717
LearnBase = "0.2, 0.3"
1818
MLDataPattern = "0.5"
1919
MLLabelUtils = "0.4, 0.5"

src/MLDataUtils.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ using StatsBase
44
using LearnBase
55
using MLLabelUtils
66
using MLDataPattern
7-
using DataFrames
7+
using DataFrames: DataFrames, AbstractDataFrame, DataFrameRow, eachcol
88

99
using LearnBase: ObsDimension
1010
import LearnBase: nobs, getobs, getobs!, datasubset, default_obsdim

src/datapattern.jl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
_throw_table_error() = throw(ArgumentError("Please specify the column that contains the targets explicitly, or provide a target-extraction-function as first parameter. see parameter 'f' in ?targets."))
22

3-
import DataFrames: DataFrames, AbstractDataFrame, SubDataFrame
4-
53
# required data container interface
64
LearnBase.nobs(dt::AbstractDataFrame) = DataFrames.nrow(dt)
75
LearnBase.getobs(dt::AbstractDataFrame, idx) = dt[idx,:]

src/feature_scaling.jl

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,9 @@ end
9292
function center!(D::AbstractDataFrame)
9393
μ_vec = Float64[]
9494

95-
flt = Bool[T <: Real for T in eltypes(D)]
96-
for colname in names(D)[flt]
97-
μ = mean(D[colname])
95+
flt = Bool[T <: Real for T in eltype.(eachcol(D))]
96+
for colname in propertynames(D)[flt]
97+
μ = mean(D[!, colname])
9898
center!(D, colname, μ)
9999
push!(μ_vec, μ)
100100
end
@@ -104,8 +104,8 @@ end
104104
function center!(D::AbstractDataFrame, colnames::AbstractVector{Symbol})
105105
μ_vec = Float64[]
106106
for colname in colnames
107-
if eltype(D[colname]) <: Real
108-
μ = mean(D[colname])
107+
if eltype(D[!, colname]) <: Real
108+
μ = mean(D[!, colname])
109109
if ismissing(μ)
110110
@warn("Column \"$colname\" contains missing values, skipping rescaling of this column!")
111111
continue
@@ -121,7 +121,7 @@ end
121121

122122
function center!(D::AbstractDataFrame, colnames::AbstractVector{Symbol}, μ::AbstractVector)
123123
for (icol, colname) in enumerate(colnames)
124-
if eltype(D[colname]) <: Real
124+
if eltype(D[!, colname]) <: Real
125125
center!(D, colname, μ[icol])
126126
else
127127
@warn("Skipping \"$colname\", centering only valid for columns of type T <: Real.")
@@ -131,15 +131,15 @@ function center!(D::AbstractDataFrame, colnames::AbstractVector{Symbol}, μ::Abs
131131
end
132132

133133
function center!(D::AbstractDataFrame, colname::Symbol, μ)
134-
if any(ismissing, D[colname])
134+
if any(ismissing, D[!, colname])
135135
@warn("Column \"$colname\" contains missing values, skipping centering on this column!")
136136
else
137-
newcol::Vector{Float64} = convert(Vector{Float64}, D[colname])
137+
newcol::Vector{Float64} = convert(Vector{Float64}, D[!, colname])
138138
nobs = length(newcol)
139139
@inbounds for i in eachindex(newcol)
140140
newcol[i] -= μ
141141
end
142-
D[colname] = newcol
142+
D[!, colname] = newcol
143143
end
144144
μ
145145
end
@@ -243,10 +243,10 @@ function rescale!(D::AbstractDataFrame)
243243
μ_vec = Float64[]
244244
σ_vec = Float64[]
245245

246-
flt = Bool[T <: Real for T in eltypes(D)]
247-
for colname in names(D)[flt]
248-
μ = mean(D[colname])
249-
σ = std(D[colname])
246+
flt = Bool[T <: Real for T in eltype.(eachcol(D))]
247+
for colname in propertynames(D)[flt]
248+
μ = mean(D[!, colname])
249+
σ = std(D[!, colname])
250250
rescale!(D, colname, μ, σ)
251251
push!(μ_vec, μ)
252252
push!(σ_vec, σ)
@@ -258,9 +258,9 @@ function rescale!(D::AbstractDataFrame, colnames::Vector{Symbol})
258258
μ_vec = Float64[]
259259
σ_vec = Float64[]
260260
for colname in colnames
261-
if eltype(D[colname]) <: Real
262-
μ = mean(D[colname])
263-
σ = std(D[colname])
261+
if eltype(D[!, colname]) <: Real
262+
μ = mean(D[!, colname])
263+
σ = std(D[!, colname])
264264
if ismissing(μ)
265265
@warn("Column \"$colname\" contains missing values, skipping rescaling of this column!")
266266
continue
@@ -277,7 +277,7 @@ end
277277

278278
function rescale!(D::AbstractDataFrame, colnames::Vector{Symbol}, μ::AbstractVector, σ::AbstractVector)
279279
for (icol, colname) in enumerate(colnames)
280-
if eltype(D[colname]) <: Real
280+
if eltype(D[!, colname]) <: Real
281281
rescale!(D, colname, μ[icol], σ[icol])
282282
else
283283
@warn("Skipping \"$colname\", rescaling only valid for columns of type T <: Real.")
@@ -287,16 +287,16 @@ function rescale!(D::AbstractDataFrame, colnames::Vector{Symbol}, μ::AbstractVe
287287
end
288288

289289
function rescale!(D::AbstractDataFrame, colname::Symbol, μ, σ)
290-
if any(ismissing, D[colname])
290+
if any(ismissing, D[!, colname])
291291
@warn("Column \"$colname\" contains missing values, skipping rescaling of this column!")
292292
else
293293
σ_div = σ == 0 ? one(σ) : σ
294-
newcol::Vector{Float64} = convert(Vector{Float64}, D[colname])
294+
newcol::Vector{Float64} = convert(Vector{Float64}, D[!, colname])
295295
nobs = length(newcol)
296296
@inbounds for i in eachindex(newcol)
297297
newcol[i] = (newcol[i] - μ) / σ_div
298298
end
299-
D[colname] = newcol
299+
D[!, colname] = newcol
300300
end
301301
μ, σ
302302
end

test/tst_feature_scaling.jl

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ e_x, _ = noisy_sin(50; noise = 0.)
22
e_X = expand_poly(e_x, degree = 5)
33
df = DataFrame(A=rand(10), B=collect(1:10), C=[string(x) for x in 1:10])
44
df_na = deepcopy(df)
5-
df_na[:A] = allowmissing(df_na[:A])
5+
df_na[!, :A] = allowmissing(df_na[!, :A])
66
df_na[1, :A] = missing
77

88
@testset "Test expand_poly" begin
@@ -65,34 +65,34 @@ end
6565

6666
# Center DataFrame
6767
D = copy(df)
68-
mu_check = [mean(D[colname]) for colname in names(D)[1:2]]
68+
mu_check = [mean(D[!, colname]) for colname in names(D)[1:2]]
6969
mu = center!(D)
7070
@test length(mu) == 2
7171
@test abs(sum(mu .- mu_check)) <= 10e-10
7272

7373
D = copy(df)
74-
mu_check = [mean(D[colname]) for colname in names(D)[1:2]]
74+
mu_check = [mean(D[!, colname]) for colname in names(D)[1:2]]
7575
mu = center!(D, [:A, :B])
7676
@test abs(sum(mu .- mu_check)) <= 10e-10
7777

7878
D = copy(df)
79-
mu_check = [mean(D[colname]) for colname in names(D)[1:2]]
79+
mu_check = [mean(D[!, colname]) for colname in names(D)[1:2]]
8080
mu = center!(D, [:A, :B], mu_check)
81-
@test abs(sum([mean(D[colname]) for colname in names(D)[1:2]])) <= 10e-10
81+
@test abs(sum([mean(D[!, colname]) for colname in names(D)[1:2]])) <= 10e-10
8282

8383
# skip columns that contain missing values
8484
D = copy(df_na)
8585
mu = center!(D, [:A, :B])
8686
@test ismissing(D[1, :A])
8787
@test all(D[2:end, :A] .== df_na[2:end, :A])
88-
@test abs(mean(D[:B])) < 10e-10
88+
@test abs(mean(D[!, :B])) < 10e-10
8989

9090
D = copy(df_na)
91-
mu_check = [mean(D[colname]) for colname in names(D)[1:2]]
91+
mu_check = [mean(D[!, colname]) for colname in names(D)[1:2]]
9292
mu = center!(D, [:A, :B], mu_check)
9393
@test ismissing(D[1, :A])
9494
@test all(D[2:end, :A] .== df_na[2:end, :A])
95-
@test abs(mean(D[:B])) < 10e-10
95+
@test abs(mean(D[!, :B])) < 10e-10
9696

9797
# Rescale Vector
9898
xa = copy(e_x)
@@ -146,33 +146,33 @@ end
146146

147147
D = copy(df)
148148
mu, sigma = rescale!(D)
149-
@test abs(sum([mean(D[colname]) for colname in names(D)[1:2]])) <= 10e-10
150-
@test mean([std(D[colname]) for colname in names(D)[1:2]]) - 1 <= 10e-10
149+
@test abs(sum([mean(D[!, colname]) for colname in names(D)[1:2]])) <= 10e-10
150+
@test mean([std(D[!, colname]) for colname in names(D)[1:2]]) - 1 <= 10e-10
151151

152152
D = copy(df)
153153
mu, sigma = rescale!(D, [:A, :B])
154-
@test abs(sum([mean(D[colname]) for colname in names(D)[1:2]])) <= 10e-10
155-
@test mean([std(D[colname]) for colname in names(D)[1:2]]) - 1 <= 10e-10
154+
@test abs(sum([mean(D[!, colname]) for colname in names(D)[1:2]])) <= 10e-10
155+
@test mean([std(D[!, colname]) for colname in names(D)[1:2]]) - 1 <= 10e-10
156156

157157
D = copy(df)
158-
mu_check = [mean(D[colname]) for colname in names(D)[1:2]]
159-
sigma_check = [std(D[colname]) for colname in names(D)[1:2]]
158+
mu_check = [mean(D[!, colname]) for colname in names(D)[1:2]]
159+
sigma_check = [std(D[!, colname]) for colname in names(D)[1:2]]
160160
mu, sigma = rescale!(D, [:A, :B], mu_check, sigma_check)
161-
@test abs(sum([mean(D[colname]) for colname in names(D)[1:2]])) <= 10e-10
162-
@test mean([std(D[colname]) for colname in names(D)[1:2]]) - 1 <= 10e-10
161+
@test abs(sum([mean(D[!, colname]) for colname in names(D)[1:2]])) <= 10e-10
162+
@test mean([std(D[!, colname]) for colname in names(D)[1:2]]) - 1 <= 10e-10
163163

164164
# skip columns that contain missing values
165165
D = copy(df_na)
166166
mu, sigma = rescale!(D, [:A, :B])
167167
@test ismissing(D[1, :A])
168168
@test all(D[2:end, :A] .== df_na[2:end, :A])
169-
@test abs(mean(D[:B])) < 10e-10
170-
@test abs(std(D[:B])) - 1 < 10e-10
169+
@test abs(mean(D[!, :B])) < 10e-10
170+
@test abs(std(D[!, :B])) - 1 < 10e-10
171171

172172
D = copy(df_na)
173-
mu_check = [mean(D[colname]) for colname in names(D)[1:2]]
173+
mu_check = [mean(D[!, colname]) for colname in names(D)[1:2]]
174174
if VERSION >= v"0.7.0-DEV.2035"
175-
sigma_check = [std(D[colname]) for colname in names(D)[1:2]]
175+
sigma_check = [std(D[!, colname]) for colname in names(D)[1:2]]
176176
mu, sigma = rescale!(D, [:A, :B], mu_check, sigma_check)
177177
end
178178
#= @test ismissing(D[1, :A]) =#

0 commit comments

Comments
 (0)