-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfunctions.py
More file actions
176 lines (127 loc) · 4.97 KB
/
functions.py
File metadata and controls
176 lines (127 loc) · 4.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
from typing import Literal
from collections import namedtuple
from numpy.typing import NDArray
import numpy as np
# Activation and loss functions paired with their derivatives, retrievable by key.
###### Activation Functions
# pylint: disable=unused-argument
# Different backward functions need different data, so in order to presen a common interface, some
# functions have unused parameters.
# String literal type containing the implemented activation functions
ActivationKey = Literal["relu", "sigmoid", "identity", "log_softmax"]
# Tuple type to store functions and their derivatives consistently
Fn = namedtuple("Fn", ["forward", "backward"])
# Parameters for activation functions:
# a_curr: the preactivation vector for the current layer
# h_curr: the postactivation vector for the current layer
# g_next: the next gradient, which in the case of an activation function is the loss-to-node
# gradient for the pre-activation vector, g_a_next.
# Linear combination Activation Linear combination
# W_curr @ h_prev phi(a_curr) W_next @ h_curr
# h_prev ----> a_curr ----> h_curr ----> a_next
# g_h_prev <---- g_a_curr <---- g_h_curr <---- g_a_next
#
def identity_forward(a_curr: NDArray):
return a_curr
def identity_backward(
a_curr: NDArray,
h_curr: NDArray,
g_next: NDArray,
):
return g_next
def sigmoid_forward(a_curr: NDArray):
return 1 / (1 + np.exp(-a_curr))
def sigmoid_backward(
a_curr: NDArray,
h_curr: NDArray,
g_next: NDArray,
):
return g_next * h_curr * (1 - h_curr)
def relu_forward(a_curr):
return np.where(a_curr > 0, a_curr, 0)
def relu_backward(
a_curr: NDArray,
a_next: NDArray,
g_next: NDArray,
):
return np.where(a_curr > 0, g_next, 0)
def log_softmax_forward(a_curr: np.ndarray) -> np.ndarray:
# Subtract the maximum value for numerical stability
a_shifted = a_curr - np.max(a_curr, axis=-1, keepdims=True)
# Compute log-softmax
log_probs = a_shifted - np.log(np.sum(np.exp(a_shifted), axis=-1, keepdims=True))
return log_probs
def log_softmax_backward(
a_curr: np.ndarray,
h_curr: np.ndarray,
g_next: np.ndarray,
) -> np.ndarray:
# Compute softmax probabilities
softmax_probs = np.exp(h_curr)
# Compute gradient
grad_a_curr = g_next - np.dot(g_next, softmax_probs) * softmax_probs
return grad_a_curr
def get_activation_fn(
key: ActivationKey,
):
"""
Get an activation function (forward) and it's derivative (backward) by key.
Args:
key (str): the activation function to lookup, options are:
- "relu"
- "sigmoid"
- "identity
Raises:
ValueError: if the provided key is not one of the implemented activations.
Returns:
ActivationFn: the forward function and its derivative (backward).
"""
if key == "relu":
return Fn(relu_forward, relu_backward)
if key == "sigmoid":
return Fn(sigmoid_forward, sigmoid_backward)
if key == "identity":
return Fn(identity_forward, identity_backward)
if key == "log_softmax":
return Fn(log_softmax_forward, log_softmax_backward)
raise ValueError(f"Unrecognized activation key '{key}'")
##### Loss Functions
# y_hat: output of the network
# y: observed value to use in computing the loss
# String literal type containing the implemented loss functions
LossKey = Literal["nll", "mse", "log_example"]
# def mse_forward(o: float | NDArray, y: float | NDArray):
# return (y - o) ** 2
def mse_forward(y_hat: float | NDArray, y: float | NDArray):
return 0.5 * np.mean((y_hat - y) ** 2)
def mse_backward(y_hat: float | NDArray, y: float | NDArray):
return 2 * (y_hat - y)
def nll_loss_forward(y_hat: np.ndarray, y: np.ndarray) -> float:
# Extract the log probability of the correct class
log_prob = y_hat[y]
# Compute the negative log likelihood loss
loss = -log_prob
return loss
def nll_loss_backward(y_hat: NDArray, y: NDArray):
softmax_probs = np.exp(y_hat) # Convert log probabilities to probabilities
y_one_hot = np.zeros_like(y_hat)
y_one_hot[y] = 1
delta = softmax_probs - y_one_hot
return delta
# Note: this log_loss was just used to mimmick the book example, it doesn't actually incorporate the
# observed value y so probably not actually useful for our purposes.
def log_loss_example_forward(y_hat: float | NDArray, y: float | NDArray):
return -np.log(y_hat)
def log_loss_example_backward(y_hat: float | NDArray, y: float | NDArray):
return -1 / y_hat
def get_loss_fn(key: LossKey):
if key == "log":
return Fn(log_loss_example_forward, log_loss_example_backward)
if key == "mse":
return Fn(mse_forward, mse_backward)
if key == "nll":
return Fn(nll_loss_forward, nll_loss_backward)
raise ValueError(f"Unrecognized loss key '{key}'")
# Stashing this in case it's needed
# def log_loss_from_gd(o: NDArray, y: NDArray):
# return np.log(1 + np.exp(-y * o))