1
1
# ------------------------------- #
2
2
# -- Logistic Regression (L2) -- #
3
3
# ------------------------------- #
4
- # -> f(θ) = -∑logσ(yXθ) + λ|θ|₂²
5
- # -> ∇f(θ) = - X'(yσ(-yXθ )) + λθ
6
- # -> ∇²f(θ) = X'(σ(yXθ)) X + λI
4
+ # -> f(θ) = -∑logσ(yXθ) + λ|θ|₂²/2
5
+ # -> ∇f(θ) = X'(y(w-1 )) + λθ
6
+ # -> ∇²f(θ) = X' Diag(w(1-w)) X + λI
7
7
# NOTE:
8
+ # * w = σ(yXθ)
8
9
# * yᵢ ∈ {±1} so that y² = 1
9
- # * -σ(-x) ==(σ(x)-1)
10
+ # * -σ(-x) == (σ(x)-1)
11
+ # NOTE: https://github.com/JuliaAI/MLJLinearModels.jl/issues/104
10
12
# ---------------------------------------------------------
11
13
12
14
function fgh! (glr:: GLR{LogisticLoss,<:L2R} , X, y, scratch)
@@ -17,12 +19,12 @@ function fgh!(glr::GLR{LogisticLoss,<:L2R}, X, y, scratch)
17
19
(f, g, H, θ) -> begin
18
20
Xθ = scratch. n
19
21
apply_X! (Xθ, X, θ) # -- Xθ = apply_X(X, θ)
20
- # precompute σ(yXθ) use -σ(-x) = (σ(x)-1)
21
- w = scratch. n2
22
- w .= σ .(Xθ .* y) # -- w = σ.(Xθ .* y)
22
+ # precompute σ(yXθ)
23
+ w = scratch. n2
24
+ w .= σ .(Xθ .* y) # -- w = σ.(Xθ .* y)
23
25
g === nothing || begin
24
26
t = scratch. n3
25
- t .= y .* ( w .- 1.0 ) # -- t = y .* (w .- 1.0)
27
+ t .= y .* w .- y # -- t = y .* (w .- 1.0)
26
28
apply_Xt! (g, X, t) # -- g = X't
27
29
g .+ = λ .* θ
28
30
glr. penalize_intercept || (g[end ] -= λ * θ[end ])
@@ -31,19 +33,21 @@ function fgh!(glr::GLR{LogisticLoss,<:L2R}, X, y, scratch)
31
33
# NOTE: we could try to be clever to reduce the allocations for
32
34
# ΛX but computing the full hessian allocates a lot anyway so
33
35
# probably not really worth it
34
- a = 1 : p
35
- ΛX = w .* X # !! big allocs
36
- mul! (view (H, a, a), X' , ΛX) # -- H[1:p,1:p] = X'ΛX
36
+ t = scratch. n3
37
+ t .= w .- w.^ 2 # σ(yXθ)(1-σ(yXθ))
38
+ a = 1 : p
39
+ ΛX = t .* X # !! big allocs
40
+ mul! (view (H, a, a), X' , ΛX) # -- H[1:p,1:p] = X'ΛX
37
41
ΛXt1 = view (scratch. p, a)
38
42
ΛXt1 .*= 0
39
43
@inbounds for i in a, j in 1 : n
40
- ΛXt1[i] += ΛX[j, i] # -- (ΛX)'1
44
+ ΛXt1[i] += ΛX[j, i] # -- (ΛX)'1
41
45
end
42
46
@inbounds for i in a
43
47
H[i, end ] = H[end , i] = ΛXt1[i] # -- H[:,p+1] = (ΛX)'1
44
48
end
45
- H[end , end ] = sum (w ) # -- 1'Λ1'
46
- add_λI! (H, λ, glr. penalize_intercept) # -- H = X'ΛX + λI
49
+ H[end , end ] = sum (t ) # -- 1'Λ1'
50
+ add_λI! (H, λ, glr. penalize_intercept) # -- H = - X'ΛX + λI
47
51
end
48
52
f === nothing || return J (y, Xθ, view_θ (glr, θ))
49
53
end
@@ -53,16 +57,18 @@ function fgh!(glr::GLR{LogisticLoss,<:L2R}, X, y, scratch)
53
57
(f, g, H, θ) -> begin
54
58
Xθ = scratch. n
55
59
apply_X! (Xθ, X, θ)
56
- w = scratch. n2
57
- w .= σ .(y .* Xθ)
60
+ w = scratch. n2
61
+ w .= σ .(y .* Xθ)
58
62
g === nothing || begin
59
63
t = scratch. n3
60
- t .= y .* ( w .- 1.0 )
64
+ t .= y .* w .- y
61
65
apply_Xt! (g, X, t)
62
66
g .+ = λ .* θ
63
67
end
64
68
H === nothing || begin
65
- mul! (H, X' , w .* X)
69
+ t = scratch. n3
70
+ t .= w .- w.^ 2
71
+ mul! (H, X' , t .* X)
66
72
add_λI! (H, λ)
67
73
end
68
74
f === nothing || return J (y, Xθ, θ)
@@ -80,8 +86,9 @@ function Hv!(glr::GLR{LogisticLoss,<:L2R}, X, y, scratch)
80
86
(Hv, θ, v) -> begin
81
87
Xθ = scratch. n
82
88
apply_X! (Xθ, X, θ) # -- Xθ = apply_X(X, θ)
83
- w = scratch. n2
84
- w .= σ .(Xθ .* y) # -- w = σ.(Xθ .* y)
89
+ w = scratch. n2
90
+ w .= σ .(Xθ .* y) # -- w = σ.(Xθ .* y)
91
+ w .- = w.^ 2 # -- w = w(1-w)
85
92
# view on the first p rows
86
93
a = 1 : p
87
94
Hvₐ = view (Hv, a)
@@ -103,9 +110,10 @@ function Hv!(glr::GLR{LogisticLoss,<:L2R}, X, y, scratch)
103
110
(Hv, θ, v) -> begin
104
111
Xθ = scratch. n
105
112
apply_X! (Xθ, X, θ)
106
- w = scratch. n2
107
- w .= σ .(Xθ .* y) # -- σ(yXθ)
108
- Xv = scratch. n
113
+ w = scratch. n2
114
+ w .= σ .(Xθ .* y) # -- σ(yXθ)
115
+ w .- = w.^ 2
116
+ Xv = scratch. n
109
117
mul! (Xv, X, v)
110
118
Xv .*= scratch. n2 # -- ΛXv
111
119
mul! (Hv, X' , Xv) # -- X'ΛXv
0 commit comments