Skip to content

Commit 9c5bfba

Browse files
committed
MNT improve readability of grad and change grad of minimize to grad of least_squares
1 parent ec54175 commit 9c5bfba

File tree

2 files changed

+102
-94
lines changed

2 files changed

+102
-94
lines changed

fastcan/narx.py

Lines changed: 95 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -406,10 +406,10 @@ class NARX(MultiOutputMixin, RegressorMixin, BaseEstimator):
406406
>>> print_narx(narx)
407407
| yid | Term | Coef |
408408
=======================================
409-
| 0 | Intercept | 1.069 |
410-
| 0 | y_hat[k-1,0] | 0.478 |
411-
| 0 | X[k-2,0] | 0.716 |
412-
| 0 | X[k-1,0]*X[k-3,0] | 1.504 |
409+
| 0 | Intercept | 1.008 |
410+
| 0 | y_hat[k-1,0] | 0.498 |
411+
| 0 | X[k-2,0] | 0.701 |
412+
| 0 | X[k-1,0]*X[k-3,0] | 1.496 |
413413
"""
414414

415415
_parameter_constraints: dict = {
@@ -466,7 +466,7 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
466466
467467
**params : dict
468468
Keyword arguments passed to
469-
`scipy.optimize.minimize`.
469+
`scipy.optimize.least_squares`.
470470
471471
Returns
472472
-------
@@ -601,7 +601,7 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
601601
coef_init = check_array(
602602
coef_init,
603603
ensure_2d=False,
604-
dtype=np.float64,
604+
dtype=float,
605605
)
606606
if coef_init.shape[0] != n_coef_intercept:
607607
raise ValueError(
@@ -634,15 +634,15 @@ def fit(self, X, y, sample_weight=None, coef_init=None, **params):
634634
return self
635635

636636
@staticmethod
637-
def _evaluate_term(term_id, delay_id, X, y_hat, k):
637+
def _evaluate_term(feat_ids, delay_ids, X, y_hat, k):
638638
n_features_in = X.shape[1]
639639
term = 1
640-
for i, feat_id in enumerate(term_id):
640+
for i, feat_id in enumerate(feat_ids):
641641
if feat_id != -1:
642642
if feat_id < n_features_in:
643-
term *= X[k - delay_id[i], feat_id]
643+
term *= X[k - delay_ids[i], feat_id]
644644
else:
645-
term *= y_hat[k - delay_id[i], feat_id - n_features_in]
645+
term *= y_hat[k - delay_ids[i], feat_id - n_features_in]
646646
return term
647647

648648
@staticmethod
@@ -666,7 +666,7 @@ def _predict(
666666
at_init = True
667667
init_k = 0
668668
for k in range(n_samples):
669-
if ~np.all(np.isfinite(X[k])):
669+
if not np.all(np.isfinite(X[k])):
670670
at_init = True
671671
init_k = k + 1
672672
y_hat[k] = np.nan
@@ -687,85 +687,101 @@ def _predict(
687687

688688
@staticmethod
689689
def _get_cfd_ids(feat_ids, delay_ids, output_ids, n_features_in):
690-
n_y = np.max(output_ids) + 1 # number of output
691-
n_d = np.max(delay_ids) # max delay
692-
693-
n_c = feat_ids.shape[0] # number of coef
694-
# number of dy/dx, [dy0(k)/dx, dy1(k)/dx, dy0(k-1)/dx, dy1(k-1)/dx, ...]
695-
n_dydx = n_y * n_d
696-
c_ids = np.arange(n_c) # Coef index
697-
698-
# Coef ids, feature ids, delay ids
699-
# cfd_ids is n_y * n_dydx
700-
cfd_ids = [[[] for _ in range(n_dydx)] for _ in range(n_y)]
701-
for i in range(n_y):
702-
for j in range(n_dydx):
703-
# Get dy[y_j](k - d_j)/dx
704-
d_j = j // n_y + 1 # delay
705-
y_j = j % n_y + n_features_in # output index
706-
output_mask = output_ids == i
707-
terms = feat_ids[output_mask]
708-
delays = delay_ids[output_mask]
709-
c_id = c_ids[output_mask]
710-
for t, (term, delay) in enumerate(zip(terms, delays)):
711-
if np.any((y_j == term) & (d_j == delay)):
712-
a_ij = []
713-
for f, (feat, k) in enumerate(zip(term, delay)):
714-
if (feat == y_j) and (k == d_j):
715-
a_ij += [
716-
[c_id[t], np.delete(term, f), np.delete(delay, f)]
717-
]
718-
cfd_ids[i][j] += a_ij
690+
"""
691+
Get ids of CFD (Coef, Feature, and Delay) matrix to update dyn(k)/dx.
692+
Maps coefficients to their corresponding features and delays.
693+
"""
694+
n_outputs = np.max(output_ids) + 1
695+
max_delay = np.max(delay_ids)
696+
697+
# Initialize cfd_ids as a list of lists n_outputs * n_outputs * max_delay
698+
# axis-0 (i): [dy0(k)/dx, dy1(k)/dx, ..., dyn(k)/dx]
699+
# axis-1 (j): [dy0(k-d)/dx, dy1(k-d)/dx, ..., dyn(k-d)/dx]
700+
# axis-2 (d): [dyj(k-1)/dx, dyj(k-2)/dx, ..., dyj(k-max_delay)/dx]
701+
cfd_ids = [
702+
[[[] for _ in range(max_delay)] for _ in range(n_outputs)]
703+
for _ in range(n_outputs)
704+
]
705+
706+
for coef_id, (term_feat_ids, term_delay_ids) in enumerate(
707+
zip(feat_ids, delay_ids)
708+
):
709+
row_y_id = output_ids[coef_id]
710+
for var_id, (feat_id, delay_id) in enumerate(
711+
zip(term_feat_ids, term_delay_ids)
712+
):
713+
if feat_id >= n_features_in and delay_id > 0:
714+
col_y_id = feat_id - n_features_in
715+
cfd_ids[row_y_id][col_y_id][delay_id - 1].append(
716+
[
717+
coef_id,
718+
np.delete(term_feat_ids, var_id),
719+
np.delete(term_delay_ids, var_id),
720+
]
721+
)
722+
719723
return cfd_ids
720724

721725
@staticmethod
722726
def _update_cfd(X, y_hat, coef, cfd_ids, k):
723-
n_y = y_hat.shape[1]
724-
n_dydx = len(cfd_ids[0])
725-
cfd = np.zeros((n_y, n_dydx))
726-
for i in range(n_y):
727-
for j in range(n_dydx):
728-
if cfd_ids[i][j]:
729-
a_ij = 0
730-
for coef_id, term_id, delay_id in cfd_ids[i][j]:
731-
a_ij += coef[coef_id] * NARX._evaluate_term(
732-
term_id, delay_id, X, y_hat, k
727+
"""
728+
Updates CFD matrix based on the current state.
729+
"""
730+
n_outputs, max_delay = y_hat.shape[1], len(cfd_ids[0][0])
731+
cfd = np.zeros((n_outputs, n_outputs, max_delay))
732+
733+
for i, yi_ids in enumerate(cfd_ids):
734+
for j, yiyj in enumerate(yi_ids):
735+
for d, yiyjd in enumerate(yiyj):
736+
if yiyjd:
737+
cfd[i, j, d] = sum(
738+
coef[coef_id]
739+
* NARX._evaluate_term(feat_id, delay_id, X, y_hat, k)
740+
for coef_id, feat_id, delay_id in yiyjd
733741
)
734-
cfd[i, j] = a_ij
735742
return cfd
736743

737744
@staticmethod
738745
def _update_dydx(X, y_hat, coef, feat_ids, delay_ids, output_ids, cfd_ids):
739-
n_samples = X.shape[0]
740-
n_y = y_hat.shape[1]
746+
"""
747+
Computation of the Jacobian matrix dydx.
748+
749+
Returns
750+
-------
751+
dydx : ndarray of shape (n_samples, n_outputs, n_x)
752+
Jacobian matrix of the outputs with respect to coefficients and intercepts.
753+
"""
754+
n_samples, n_y = y_hat.shape
741755
max_delay = np.max(delay_ids)
742-
n_c = feat_ids.shape[0]
743-
n_x = n_c + n_y
744-
output_x_ids = np.r_[output_ids, np.arange(n_y)]
745-
if max_delay == 0:
746-
dydx = np.zeros((n_samples, n_x, n_y))
747-
else:
748-
dydx = np.zeros((n_samples, n_x, n_y * max_delay))
749-
for k in range(max_delay, n_samples):
750-
for i in range(n_x):
751-
if i < n_c:
752-
term = NARX._evaluate_term(feat_ids[i], delay_ids[i], X, y_hat, k)
753-
else:
754-
term = 1
756+
n_coefs = feat_ids.shape[0]
757+
n_x = n_coefs + n_y # Total number of coefficients and intercepts
758+
y_ids = np.r_[output_ids, np.arange(n_y)]
759+
x_ids = np.arange(n_x)
755760

756-
if ~np.isfinite(term):
757-
continue
758-
dydx[k, i, output_x_ids[i]] = term
759-
if max_delay != 0:
760-
cfd = NARX._update_cfd(X, y_hat, coef, cfd_ids, k)
761-
if ~np.all(np.isfinite(cfd)):
762-
continue
763-
dydx[k, i, :n_y] += cfd @ dydx[k - 1, i]
764-
dydx[k, i, n_y:] = dydx[k - 1, i, :-n_y]
761+
dydx = np.zeros((n_samples, n_y, n_x), dtype=float)
762+
for k in range(max_delay, n_samples):
763+
# Compute terms for time step k
764+
terms = np.ones(n_x, dtype=float)
765+
for j in range(n_coefs):
766+
terms[j] = NARX._evaluate_term(feat_ids[j], delay_ids[j], X, y_hat, k)
767+
if not np.all(np.isfinite(terms)):
768+
break
769+
770+
# Update constant terms of Jacobian
771+
dydx[k, y_ids, x_ids] = terms
772+
773+
# Update dynamic terms of Jacobian
774+
if max_delay > 0:
775+
cfd = NARX._update_cfd(X, y_hat, coef, cfd_ids, k)
776+
for d in range(max_delay):
777+
dydx[k] += cfd[:, :, d] @ dydx[k - d - 1]
778+
779+
# Handle divergence
765780
if np.any(dydx[k] > 1e20):
766781
dydx[k:] = 1e20
767-
return dydx[:, :, :n_y]
768-
return dydx[:, :, :n_y]
782+
break
783+
784+
return dydx
769785

770786
@staticmethod
771787
def _loss(
@@ -821,15 +837,11 @@ def _grad(
821837
mask_nomissing = _mask_missing_value(
822838
y, y_hat, sample_weight_sqrt, return_mask=True
823839
)
824-
y_masked = y[mask_nomissing]
825-
y_hat_masked = y_hat[mask_nomissing]
840+
826841
sample_weight_sqrt_masked = sample_weight_sqrt[mask_nomissing]
827842
dydx_masked = dydx[mask_nomissing]
828843

829-
e = y_hat_masked - y_masked
830-
return (e[:, np.newaxis, :] * dydx_masked).sum(
831-
axis=2
832-
) * sample_weight_sqrt_masked
844+
return dydx_masked.sum(axis=1) * sample_weight_sqrt_masked
833845

834846
@validate_params(
835847
{

tests/test_narx_jac.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,10 @@ def test_simple():
5151
output_ids=output_ids,
5252
)
5353

54-
55-
e1 = y_hat_1 - y
5654
grad_truth = np.array([
57-
np.sum(e1*np.array([0, y_hat_1[0, 0], y_hat_1[1, 0]+coef_1[0]]).reshape(-1, 1)),
58-
np.sum(e1*np.array([0, X[0, 0], X[0, 0]*coef_1[0]+X[0, 0]]).reshape(-1, 1)),
59-
np.sum(e1*np.array([0, 1, coef_1[0]+1]).reshape(-1, 1)),
55+
np.sum(np.array([0, y_hat_1[0, 0], y_hat_1[1, 0]+coef_1[0]]).reshape(-1, 1)),
56+
np.sum(np.array([0, X[0, 0], X[0, 0]*coef_1[0]+X[0, 0]]).reshape(-1, 1)),
57+
np.sum(np.array([0, 1, coef_1[0]+1]).reshape(-1, 1)),
6058
])
6159

6260

@@ -75,7 +73,6 @@ def test_simple():
7573

7674
assert_almost_equal(grad.sum(axis=0), grad_truth, decimal=4)
7775

78-
7976
def test_complex():
8077
"""Complex model"""
8178
# Simulated model
@@ -178,7 +175,7 @@ def test_complex():
178175
delay_ids=delay_ids,
179176
output_ids=output_ids,
180177
)
181-
loss_0 = 0.5*np.sum((y_hat_0 - y)**2)
178+
e_0 = y_hat_0 - y
182179

183180
delta_w = 0.00001
184181
for i in range(len(coef)+len(intercept)):
@@ -202,8 +199,7 @@ def test_complex():
202199
output_ids=output_ids,
203200
)
204201

205-
e1 = y_hat_1 - y
206-
loss_1 = 0.5*np.sum((e1)**2)
207-
grad_num = (loss_1 - loss_0) / delta_w
202+
e_1 = y_hat_1 - y
203+
grad_num = (e_1 - e_0).sum(axis=1) / delta_w
208204

209-
assert_allclose(grad.sum(axis=0)[i], grad_num, rtol=1e-1)
205+
assert_allclose(grad.sum(axis=0)[i], grad_num.sum(), rtol=1e-1)

0 commit comments

Comments
 (0)