|
117 | 117 | " self.Y = np.vstack([self.Y, Y])\n", |
118 | 118 | " self.parameter_update()\n", |
119 | 119 | "\n", |
120 | | - " def parameter_update(self):\n", |
121 | | - " if self.X.shape[0] < 2:\n", |
122 | | - " return\n", |
123 | | - " def loss(theta):\n", |
124 | | - " preds = self.g.g(self.X @ theta)\n", |
125 | | - " errors = preds - self.Y.flatten()\n", |
126 | | - " weights = 1 / self.g.v(preds)\n", |
127 | | - " return np.sum((errors**2) * weights) + self.reg * np.linalg.norm(theta)**2\n", |
| 120 | + " def parameter_update(self, z, D_t):\n", |
| 121 | + " \"\"\"\n", |
| 122 | + " One-step Sherman-Morrison update of the quasi-MLE for the *identity* link g(u)=u\n", |
| 123 | + " (linear demand). If you keep a general g, replace D_t by the *score* below.\n", |
| 124 | + " \"\"\"\n", |
| 125 | + " if self.t == 0:\n", |
| 126 | + " # first call: initialise\n", |
| 127 | + " d = len(z)\n", |
| 128 | + " self.M_inv = np.eye(d) / self.lam # (λI)^{-1}\n", |
| 129 | + " self.q = np.zeros(d)\n", |
| 130 | + "\n", |
| 131 | + " # rank-1 update of M_t^{-1}\n", |
| 132 | + " Mz = self.M_inv @ z\n", |
| 133 | + " self.M_inv -= np.outer(Mz, Mz) / (1.0 + z @ Mz)\n", |
| 134 | + "\n", |
| 135 | + " # running first-order term\n", |
| 136 | + " self.q += z * D_t\n", |
| 137 | + "\n", |
| 138 | + " # new parameter\n", |
| 139 | + " theta_hat = self.M_inv @ self.q\n", |
| 140 | + " d = theta_hat.size // 2\n", |
| 141 | + " self.alpha, self.beta = theta_hat[:d], theta_hat[d:]\n", |
128 | 142 | "\n", |
129 | | - " theta0 = np.concatenate([self.alpha, self.beta])\n", |
130 | | - " res = minimize(loss, theta0, method='L-BFGS-B')\n", |
131 | | - " if res.success:\n", |
132 | | - " theta_hat = res.x\n", |
133 | | - " d = self.environment_info.observation_space['features'].shape[0]\n", |
134 | | - " self.alpha = theta_hat[:d]\n", |
135 | | - " self.beta = theta_hat[d:]\n", |
136 | 143 | "\n", |
137 | 144 | " def sample_design_matrix(self):\n", |
138 | 145 | " d = self.environment_info.observation_space['features'].shape[0]\n", |
|
150 | 157 | " ])\n", |
151 | 158 | " return np.linalg.inv(block_matrix @ np.linalg.inv(M) @ block_matrix.T)\n", |
152 | 159 | "\n", |
153 | | - " def sample_from_confidence_region(self, theta_hat, M, N=50):\n", |
| 160 | + " def sample_from_confidence_region(self, theta_hat, M, N=50, gamma=None):\n", |
| 161 | + " \"\"\"\n", |
| 162 | + " Draw N points uniformly at random from\n", |
| 163 | + " {theta : (theta - theta_hat)^T M^{-1} (theta - theta_hat) <= gamma}\n", |
| 164 | + " \"\"\"\n", |
| 165 | + " d = len(theta_hat) # here d = 2·feature_dim\n", |
| 166 | + " if gamma is None:\n", |
| 167 | + " # Simple hard-coded radius like the authors’ demo: Γ = d / 20\n", |
| 168 | + " # In production you would compute the analytic β_t²\n", |
| 169 | + " gamma = d / 20.0\n", |
| 170 | + "\n", |
| 171 | + " # Cholesky factor of M^{-1}\n", |
154 | 172 | " L = np.linalg.cholesky(np.linalg.inv(M))\n", |
155 | | - " u = np.random.randn(len(theta_hat), N)\n", |
156 | | - " u /= np.linalg.norm(u, axis=0)\n", |
157 | | - " return (theta_hat[:, np.newaxis] + (1 / self.environment_info.observation_space['features'].shape[0]) * (L @ u)).T\n", |
| 173 | + "\n", |
| 174 | + " # 1. Draw points *in* the unit ball (not only on the surface)\n", |
| 175 | + " rng = np.random.default_rng()\n", |
| 176 | + " u = rng.normal(size=(d, N))\n", |
| 177 | + " u /= np.linalg.norm(u, axis=0) # on the sphere\n", |
| 178 | + " r = rng.random(N)**(1.0 / d) # radii ∼ U[0,1]^{1/d}\n", |
| 179 | + " u *= r # now in the ball\n", |
| 180 | + "\n", |
| 181 | + " # 2. Map ball → ellipsoid and 3. translate by theta_hat\n", |
| 182 | + " samples = theta_hat[:, None] + np.sqrt(gamma) * (L @ u)\n", |
| 183 | + " return samples.T # shape (N, d)\n", |
| 184 | + "\n", |
158 | 185 | "\n", |
159 | 186 | " def max_rev(self, samples, x):\n", |
160 | 187 | " max_val = -np.inf\n", |
|
0 commit comments