Skip to content

Commit 3d6e46b

Browse files
committed
Reorganize source files
1 parent 0f56276 commit 3d6e46b

File tree

6 files changed

+395
-390
lines changed

6 files changed

+395
-390
lines changed

src/transforms.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ include("transforms/stdnames.jl")
270270
include("transforms/sort.jl")
271271
include("transforms/sample.jl")
272272
include("transforms/filter.jl")
273+
include("transforms/dropmissing.jl")
273274
include("transforms/dropextrema.jl")
274275
include("transforms/map.jl")
275276
include("transforms/replace.jl")

src/transforms/dropmissing.jl

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# ------------------------------------------------------------------
2+
# Licensed under the MIT License. See LICENSE in the project root.
3+
# ------------------------------------------------------------------
4+
5+
"""
6+
DropMissing()
7+
DropMissing(:)
8+
9+
Drop all rows with missing values in table.
10+
11+
DropMissing(col₁, col₂, ..., colₙ)
12+
DropMissing([col₁, col₂, ..., colₙ])
13+
DropMissing((col₁, col₂, ..., colₙ))
14+
15+
Drop all rows with missing values in selected columns `col₁`, `col₂`, ..., `colₙ`.
16+
17+
DropMissing(regex)
18+
19+
Drop all rows with missing values in columns that match with `regex`.
20+
21+
# Examples
22+
23+
```julia
24+
DropMissing()
25+
DropMissing("b", "c", "e")
26+
DropMissing([2, 3, 5])
27+
DropMissing((:b, :c, :e))
28+
DropMissing(r"[bce]")
29+
```
30+
31+
## Notes
32+
33+
* The transform can alter the element type of columns from `Union{Missing,T}` to `T`.
34+
* If the transformed column has only `missing` values, it will be converted to an empty column of type `Any`.
35+
"""
36+
struct DropMissing{S<:ColSpec} <: StatelessFeatureTransform
37+
colspec::S
38+
end
39+
40+
DropMissing() = DropMissing(AllSpec())
41+
DropMissing(spec) = DropMissing(colspec(spec))
42+
DropMissing(cols::T...) where {T<:Col} = DropMissing(colspec(cols))
43+
44+
isrevertible(::Type{<:DropMissing}) = true
45+
46+
_ftrans(::DropMissing{AllSpec}, snames) = Filter(row -> all(!ismissing, row))
47+
_ftrans(::DropMissing, snames) = Filter(row -> all(!ismissing, row[nm] for nm in snames))
48+
49+
# nonmissing
50+
_nonmissing(::Type{T}, x) where {T} = x
51+
_nonmissing(::Type{Union{Missing,T}}, x) where {T} = collect(T, x)
52+
_nonmissing(::Type{Missing}, x) = []
53+
_nonmissing(x) = _nonmissing(eltype(x), x)
54+
55+
function preprocess(transform::DropMissing, table)
56+
schema = Tables.schema(table)
57+
names = schema.names
58+
snames = choose(transform.colspec, names)
59+
ftrans = _ftrans(transform, snames)
60+
fprep = preprocess(ftrans, table)
61+
ftrans, fprep, snames
62+
end
63+
64+
function applyfeat(::DropMissing, feat, prep)
65+
# apply filter transform
66+
ftrans, fprep, snames = prep
67+
newfeat, ffcache = applyfeat(ftrans, feat, fprep)
68+
69+
# drop Missing type
70+
cols = Tables.columns(newfeat)
71+
names = Tables.columnnames(cols)
72+
columns = map(names) do nm
73+
x = Tables.getcolumn(cols, nm)
74+
nm snames ? _nonmissing(x) : x
75+
end
76+
𝒯 = (; zip(names, columns)...)
77+
newfeat = 𝒯 |> Tables.materializer(feat)
78+
79+
# original column types
80+
types = Tables.schema(feat).types
81+
82+
newfeat, (ftrans, ffcache, types)
83+
end
84+
85+
function revertfeat(::DropMissing, newfeat, fcache)
86+
ftrans, ffcache, types = fcache
87+
88+
# reintroduce Missing type
89+
cols = Tables.columns(newfeat)
90+
names = Tables.columnnames(cols)
91+
columns = map(zip(types, names)) do (T, nm)
92+
x = Tables.getcolumn(cols, nm)
93+
collect(T, x)
94+
end
95+
𝒯 = (; zip(names, columns)...)
96+
ofeat = 𝒯 |> Tables.materializer(newfeat)
97+
98+
# revert filter transform
99+
revertfeat(ftrans, ofeat, ffcache)
100+
end

src/transforms/filter.jl

Lines changed: 5 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
# ------------------------------------------------------------------
44

55
"""
6-
Filter(func)
6+
Filter(pred)
77
8-
Filters the table returning only the rows where the `func` returns true.
8+
Filters the table returning only the rows where
9+
the predicate `pred` is `true`.
910
1011
# Examples
1112
@@ -23,7 +24,7 @@ Filter(row -> row["a"] == true && row["b"] < 30)
2324
* The schema of the table is preserved by the transform.
2425
"""
2526
struct Filter{F} <: StatelessFeatureTransform
26-
func::F
27+
pred::F
2728
end
2829

2930
isrevertible(::Type{<:Filter}) = true
@@ -35,7 +36,7 @@ function preprocess(transform::Filter, table)
3536
# selected indices
3637
sinds, nrows = Int[], 0
3738
for (i, row) in enumerate(rows)
38-
transform.func(row) && push!(sinds, i)
39+
transform.pred(row) && push!(sinds, i)
3940
nrows += 1
4041
end
4142

@@ -72,100 +73,3 @@ function revertfeat(::Filter, newfeat, fcache)
7273

7374
rows |> Tables.materializer(newfeat)
7475
end
75-
76-
"""
77-
DropMissing()
78-
DropMissing(:)
79-
80-
Drop all rows with missing values in table.
81-
82-
DropMissing(col₁, col₂, ..., colₙ)
83-
DropMissing([col₁, col₂, ..., colₙ])
84-
DropMissing((col₁, col₂, ..., colₙ))
85-
86-
Drop all rows with missing values in selected columns `col₁`, `col₂`, ..., `colₙ`.
87-
88-
DropMissing(regex)
89-
90-
Drop all rows with missing values in columns that match with `regex`.
91-
92-
# Examples
93-
94-
```julia
95-
DropMissing()
96-
DropMissing("b", "c", "e")
97-
DropMissing([2, 3, 5])
98-
DropMissing((:b, :c, :e))
99-
DropMissing(r"[bce]")
100-
```
101-
102-
## Notes
103-
104-
* The transform can alter the element type of columns from `Union{Missing,T}` to `T`.
105-
* If the transformed column has only `missing` values, it will be converted to an empty column of type `Any`.
106-
"""
107-
struct DropMissing{S<:ColSpec} <: StatelessFeatureTransform
108-
colspec::S
109-
end
110-
111-
DropMissing() = DropMissing(AllSpec())
112-
DropMissing(spec) = DropMissing(colspec(spec))
113-
DropMissing(cols::T...) where {T<:Col} = DropMissing(colspec(cols))
114-
115-
isrevertible(::Type{<:DropMissing}) = true
116-
117-
_ftrans(::DropMissing{AllSpec}, snames) = Filter(row -> all(!ismissing, row))
118-
_ftrans(::DropMissing, snames) = Filter(row -> all(!ismissing, row[nm] for nm in snames))
119-
120-
# nonmissing
121-
_nonmissing(::Type{T}, x) where {T} = x
122-
_nonmissing(::Type{Union{Missing,T}}, x) where {T} = collect(T, x)
123-
_nonmissing(::Type{Missing}, x) = []
124-
_nonmissing(x) = _nonmissing(eltype(x), x)
125-
126-
function preprocess(transform::DropMissing, table)
127-
schema = Tables.schema(table)
128-
names = schema.names
129-
snames = choose(transform.colspec, names)
130-
ftrans = _ftrans(transform, snames)
131-
fprep = preprocess(ftrans, table)
132-
ftrans, fprep, snames
133-
end
134-
135-
function applyfeat(::DropMissing, feat, prep)
136-
# apply filter transform
137-
ftrans, fprep, snames = prep
138-
newfeat, ffcache = applyfeat(ftrans, feat, fprep)
139-
140-
# drop Missing type
141-
cols = Tables.columns(newfeat)
142-
names = Tables.columnnames(cols)
143-
columns = map(names) do nm
144-
x = Tables.getcolumn(cols, nm)
145-
nm snames ? _nonmissing(x) : x
146-
end
147-
𝒯 = (; zip(names, columns)...)
148-
newfeat = 𝒯 |> Tables.materializer(feat)
149-
150-
# original column types
151-
types = Tables.schema(feat).types
152-
153-
newfeat, (ftrans, ffcache, types)
154-
end
155-
156-
function revertfeat(::DropMissing, newfeat, fcache)
157-
ftrans, ffcache, types = fcache
158-
159-
# reintroduce Missing type
160-
cols = Tables.columns(newfeat)
161-
names = Tables.columnnames(cols)
162-
columns = map(zip(types, names)) do (T, nm)
163-
x = Tables.getcolumn(cols, nm)
164-
collect(T, x)
165-
end
166-
𝒯 = (; zip(names, columns)...)
167-
ofeat = 𝒯 |> Tables.materializer(newfeat)
168-
169-
# revert filter transform
170-
revertfeat(ftrans, ofeat, ffcache)
171-
end

test/transforms.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ transformfiles = [
55
"sort.jl",
66
"sample.jl",
77
"filter.jl",
8+
"dropmissing.jl",
89
"dropextrema.jl",
910
"map.jl",
1011
"replace.jl",

0 commit comments

Comments
 (0)