Skip to content

Commit b607ffa

Browse files
committed
Update the UCB parameter update to sherman
1 parent e4930cd commit b607ffa

File tree

2 files changed

+93
-39
lines changed

2 files changed

+93
-39
lines changed

ddopai/agents/dynamic_pricing/UCB.py

Lines changed: 47 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -80,22 +80,29 @@ def fit(self, X, Y, action):
8080
self.Y = np.vstack([self.Y, Y])
8181
self.parameter_update()
8282

83-
def parameter_update(self):
84-
if self.X.shape[0] < 2:
85-
return
86-
def loss(theta):
87-
preds = self.g.g(self.X @ theta)
88-
errors = preds - self.Y.flatten()
89-
weights = 1 / self.g.v(preds)
90-
return np.sum((errors**2) * weights) + self.reg * np.linalg.norm(theta)**2
91-
92-
theta0 = np.concatenate([self.alpha, self.beta])
93-
res = minimize(loss, theta0, method='L-BFGS-B')
94-
if res.success:
95-
theta_hat = res.x
96-
d = self.environment_info.observation_space['features'].shape[0]
97-
self.alpha = theta_hat[:d]
98-
self.beta = theta_hat[d:]
83+
def parameter_update(self, z, D_t):
84+
"""
85+
One-step Sherman-Morrison update of the quasi-MLE for the *identity* link g(u)=u
86+
(linear demand). If you keep a general g, replace D_t by the *score* below.
87+
"""
88+
if self.t == 0:
89+
# first call: initialise
90+
d = len(z)
91+
self.M_inv = np.eye(d) / self.lam # (λI)^{-1}
92+
self.q = np.zeros(d)
93+
94+
# rank-1 update of M_t^{-1}
95+
Mz = self.M_inv @ z
96+
self.M_inv -= np.outer(Mz, Mz) / (1.0 + z @ Mz)
97+
98+
# running first-order term
99+
self.q += z * D_t
100+
101+
# new parameter
102+
theta_hat = self.M_inv @ self.q
103+
d = theta_hat.size // 2
104+
self.alpha, self.beta = theta_hat[:d], theta_hat[d:]
105+
99106

100107
def sample_design_matrix(self):
101108
d = self.environment_info.observation_space['features'].shape[0]
@@ -113,11 +120,31 @@ def compute_uncertainty_M(self, x_t):
113120
])
114121
return np.linalg.inv(block_matrix @ np.linalg.inv(M) @ block_matrix.T)
115122

116-
def sample_from_confidence_region(self, theta_hat, M, N=50):
123+
def sample_from_confidence_region(self, theta_hat, M, N=50, gamma=None):
124+
"""
125+
Draw N points uniformly at random from
126+
{theta : (theta - theta_hat)^T M^{-1} (theta - theta_hat) <= gamma}
127+
"""
128+
d = len(theta_hat) # here d = 2·feature_dim
129+
if gamma is None:
130+
# Simple hard-coded radius like the authors’ demo: Γ = d / 20
131+
# In production you would compute the analytic β_t²
132+
gamma = d / 20.0
133+
134+
# Cholesky factor of M^{-1}
117135
L = np.linalg.cholesky(np.linalg.inv(M))
118-
u = np.random.randn(len(theta_hat), N)
119-
u /= np.linalg.norm(u, axis=0)
120-
return (theta_hat[:, np.newaxis] + (1 / self.environment_info.observation_space['features'].shape[0]) * (L @ u)).T
136+
137+
# 1. Draw points *in* the unit ball (not only on the surface)
138+
rng = np.random.default_rng()
139+
u = rng.normal(size=(d, N))
140+
u /= np.linalg.norm(u, axis=0) # on the sphere
141+
r = rng.random(N)**(1.0 / d) # radii ∼ U[0,1]^{1/d}
142+
u *= r # now in the ball
143+
144+
# 2. Map ball → ellipsoid and 3. translate by theta_hat
145+
samples = theta_hat[:, None] + np.sqrt(gamma) * (L @ u)
146+
return samples.T # shape (N, d)
147+
121148

122149
def max_rev(self, samples, x):
123150
max_val = -np.inf

nbs/30_agents/42_DP_agents/13_UCB_agent.ipynb

Lines changed: 46 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -117,22 +117,29 @@
117117
" self.Y = np.vstack([self.Y, Y])\n",
118118
" self.parameter_update()\n",
119119
"\n",
120-
" def parameter_update(self):\n",
121-
" if self.X.shape[0] < 2:\n",
122-
" return\n",
123-
" def loss(theta):\n",
124-
" preds = self.g.g(self.X @ theta)\n",
125-
" errors = preds - self.Y.flatten()\n",
126-
" weights = 1 / self.g.v(preds)\n",
127-
" return np.sum((errors**2) * weights) + self.reg * np.linalg.norm(theta)**2\n",
120+
" def parameter_update(self, z, D_t):\n",
121+
" \"\"\"\n",
122+
" One-step Sherman-Morrison update of the quasi-MLE for the *identity* link g(u)=u\n",
123+
" (linear demand). If you keep a general g, replace D_t by the *score* below.\n",
124+
" \"\"\"\n",
125+
" if self.t == 0:\n",
126+
" # first call: initialise\n",
127+
" d = len(z)\n",
128+
" self.M_inv = np.eye(d) / self.lam # (λI)^{-1}\n",
129+
" self.q = np.zeros(d)\n",
130+
"\n",
131+
" # rank-1 update of M_t^{-1}\n",
132+
" Mz = self.M_inv @ z\n",
133+
" self.M_inv -= np.outer(Mz, Mz) / (1.0 + z @ Mz)\n",
134+
"\n",
135+
" # running first-order term\n",
136+
" self.q += z * D_t\n",
137+
"\n",
138+
" # new parameter\n",
139+
" theta_hat = self.M_inv @ self.q\n",
140+
" d = theta_hat.size // 2\n",
141+
" self.alpha, self.beta = theta_hat[:d], theta_hat[d:]\n",
128142
"\n",
129-
" theta0 = np.concatenate([self.alpha, self.beta])\n",
130-
" res = minimize(loss, theta0, method='L-BFGS-B')\n",
131-
" if res.success:\n",
132-
" theta_hat = res.x\n",
133-
" d = self.environment_info.observation_space['features'].shape[0]\n",
134-
" self.alpha = theta_hat[:d]\n",
135-
" self.beta = theta_hat[d:]\n",
136143
"\n",
137144
" def sample_design_matrix(self):\n",
138145
" d = self.environment_info.observation_space['features'].shape[0]\n",
@@ -150,11 +157,31 @@
150157
" ])\n",
151158
" return np.linalg.inv(block_matrix @ np.linalg.inv(M) @ block_matrix.T)\n",
152159
"\n",
153-
" def sample_from_confidence_region(self, theta_hat, M, N=50):\n",
160+
" def sample_from_confidence_region(self, theta_hat, M, N=50, gamma=None):\n",
161+
" \"\"\"\n",
162+
" Draw N points uniformly at random from\n",
163+
" {theta : (theta - theta_hat)^T M^{-1} (theta - theta_hat) <= gamma}\n",
164+
" \"\"\"\n",
165+
" d = len(theta_hat) # here d = 2·feature_dim\n",
166+
" if gamma is None:\n",
167+
" # Simple hard-coded radius like the authors’ demo: Γ = d / 20\n",
168+
" # In production you would compute the analytic β_t²\n",
169+
" gamma = d / 20.0\n",
170+
"\n",
171+
" # Cholesky factor of M^{-1}\n",
154172
" L = np.linalg.cholesky(np.linalg.inv(M))\n",
155-
" u = np.random.randn(len(theta_hat), N)\n",
156-
" u /= np.linalg.norm(u, axis=0)\n",
157-
" return (theta_hat[:, np.newaxis] + (1 / self.environment_info.observation_space['features'].shape[0]) * (L @ u)).T\n",
173+
"\n",
174+
" # 1. Draw points *in* the unit ball (not only on the surface)\n",
175+
" rng = np.random.default_rng()\n",
176+
" u = rng.normal(size=(d, N))\n",
177+
" u /= np.linalg.norm(u, axis=0) # on the sphere\n",
178+
" r = rng.random(N)**(1.0 / d) # radii ∼ U[0,1]^{1/d}\n",
179+
" u *= r # now in the ball\n",
180+
"\n",
181+
" # 2. Map ball → ellipsoid and 3. translate by theta_hat\n",
182+
" samples = theta_hat[:, None] + np.sqrt(gamma) * (L @ u)\n",
183+
" return samples.T # shape (N, d)\n",
184+
"\n",
158185
"\n",
159186
" def max_rev(self, samples, x):\n",
160187
" max_val = -np.inf\n",

0 commit comments

Comments
 (0)