Skip to content

Commit 0f56276

Browse files
authored
Add DropExtrema transform (#201)
* Add 'DropExtrema' transform * Add tests * Add to docs * Fix typo * Update test/transforms.jl
1 parent 71cda24 commit 0f56276

File tree

6 files changed

+116
-0
lines changed

6 files changed

+116
-0
lines changed

docs/src/transforms.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,12 @@ Filter
5050
DropMissing
5151
```
5252

53+
## DropExtrema
54+
55+
```@docs
56+
DropExtrema
57+
```
58+
5359
## Map
5460

5561
```@docs

src/TableTransforms.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ export
5353
Sample,
5454
Filter,
5555
DropMissing,
56+
DropExtrema,
5657
Map,
5758
Replace,
5859
Coalesce,

src/transforms.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ include("transforms/stdnames.jl")
270270
include("transforms/sort.jl")
271271
include("transforms/sample.jl")
272272
include("transforms/filter.jl")
273+
include("transforms/dropextrema.jl")
273274
include("transforms/map.jl")
274275
include("transforms/replace.jl")
275276
include("transforms/coalesce.jl")

src/transforms/dropextrema.jl

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# ------------------------------------------------------------------
2+
# Licensed under the MIT License. See LICENSE in the project root.
3+
# ------------------------------------------------------------------
4+
5+
"""
6+
DropExtrema(col; low=0.25, high=0.75)
7+
8+
Drops the rows where the values in the column `col` are outside the interval
9+
`[quantile(col, low), quantile(col, high)]`.
10+
11+
# Examples
12+
13+
```julia
14+
DropExtrema(1)
15+
DropExtrema(:a, low=0.2, high=0.8)
16+
DropExtrema("a", low=0.3, high=0.7)
17+
```
18+
"""
19+
struct DropExtrema{S<:ColSpec,T} <: StatelessFeatureTransform
20+
colspec::S
21+
low::T
22+
high::T
23+
24+
function DropExtrema(col::Col, low::T, high::T) where {T}
25+
@assert 0 low high 1 "invalid quantiles"
26+
cs = colspec(col)
27+
new{typeof(cs),T}(cs, low, high)
28+
end
29+
end
30+
31+
DropExtrema(col::Col, low, high) = DropExtrema(col, promote(low, high)...)
32+
DropExtrema(col::Col; low=0.25, high=0.75) = DropExtrema(col, low, high)
33+
34+
isrevertible(::Type{<:DropExtrema}) = true
35+
36+
function preprocess(transform::DropExtrema, table)
37+
cols = Tables.columns(table)
38+
names = Tables.columnnames(cols)
39+
sname = choose(transform.colspec, names) |> first
40+
41+
x = Tables.getcolumn(cols, sname)
42+
low = convert(eltype(x), transform.low)
43+
high = convert(eltype(x), transform.high)
44+
xl, xh = quantile(x, (low, high))
45+
46+
ftrans = Filter(row -> xl row[sname] xh)
47+
fprep = preprocess(ftrans, table)
48+
ftrans, fprep
49+
end
50+
51+
function applyfeat(::DropExtrema, feat, prep)
52+
ftrans, fprep = prep
53+
newfeat, ffcache = applyfeat(ftrans, feat, fprep)
54+
newfeat, (ftrans, ffcache)
55+
end
56+
57+
function revertfeat(::DropExtrema, newfeat, fcache)
58+
ftrans, ffcache = fcache
59+
revertfeat(ftrans, newfeat, ffcache)
60+
end

test/transforms.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ transformfiles = [
55
"sort.jl",
66
"sample.jl",
77
"filter.jl",
8+
"dropextrema.jl",
89
"map.jl",
910
"replace.jl",
1011
"coalesce.jl",

test/transforms/dropextrema.jl

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
@testset "DropExtrema" begin
2+
@test isrevertible(DropExtrema(:a))
3+
4+
a = [6.9, 9.0, 7.8, 0.0, 5.1, 4.8, 1.1, 8.0, 5.4, 7.9]
5+
b = [7.7, 4.2, 6.3, 1.4, 4.4, 0.5, 3.0, 6.1, 1.9, 1.5]
6+
c = [6.1, 7.7, 5.7, 2.8, 2.8, 6.7, 8.4, 5.0, 8.9, 1.0]
7+
d = [1.0, 2.8, 6.2, 1.9, 8.1, 6.2, 4.0, 6.9, 4.1, 1.4]
8+
e = [1.5, 8.9, 4.1, 1.6, 5.9, 1.3, 4.9, 3.5, 2.4, 6.3]
9+
f = [1.9, 2.1, 9.0, 6.2, 1.3, 8.9, 6.2, 3.8, 5.1, 2.3]
10+
t = Table(; a, b, c, d, e, f)
11+
12+
T = DropExtrema(1)
13+
n, c = apply(T, t)
14+
@test n.a == [6.9, 7.8, 5.1, 5.4]
15+
@test n.b == [7.7, 6.3, 4.4, 1.9]
16+
@test n.c == [6.1, 5.7, 2.8, 8.9]
17+
@test n.d == [1.0, 6.2, 8.1, 4.1]
18+
@test n.e == [1.5, 4.1, 5.9, 2.4]
19+
@test n.f == [1.9, 9.0, 1.3, 5.1]
20+
tₒ = revert(T, n, c)
21+
@test t == tₒ
22+
23+
T = DropExtrema(:c, low=0.3, high=0.7)
24+
n, c = apply(T, t)
25+
@test n.a == [6.9, 7.8, 4.8, 8.0]
26+
@test n.b == [7.7, 6.3, 0.5, 6.1]
27+
@test n.c == [6.1, 5.7, 6.7, 5.0]
28+
@test n.d == [1.0, 6.2, 6.2, 6.9]
29+
@test n.e == [1.5, 4.1, 1.3, 3.5]
30+
@test n.f == [1.9, 9.0, 8.9, 3.8]
31+
tₒ = revert(T, n, c)
32+
@test t == tₒ
33+
34+
T = DropExtrema("e", low=0.2, high=0.8)
35+
n, c = apply(T, t)
36+
@test n.a == [7.8, 0.0, 5.1, 1.1, 8.0, 5.4]
37+
@test n.b == [6.3, 1.4, 4.4, 3.0, 6.1, 1.9]
38+
@test n.c == [5.7, 2.8, 2.8, 8.4, 5.0, 8.9]
39+
@test n.d == [6.2, 1.9, 8.1, 4.0, 6.9, 4.1]
40+
@test n.e == [4.1, 1.6, 5.9, 4.9, 3.5, 2.4]
41+
@test n.f == [9.0, 6.2, 1.3, 6.2, 3.8, 5.1]
42+
tₒ = revert(T, n, c)
43+
@test t == tₒ
44+
45+
# throws
46+
@test_throws AssertionError DropExtrema(:a, low=0, high=1.4)
47+
end

0 commit comments

Comments
 (0)