Update the UCB parameter update to sherman

miTTimmiTTim · miTTimmiTTim · commit b607ffa8b93c · 2025-07-03T12:19:25.000+02:00
diff --git a/ddopai/agents/dynamic_pricing/UCB.py b/ddopai/agents/dynamic_pricing/UCB.py
@@ -80,22 +80,29 @@ def fit(self, X, Y, action):
         self.Y = np.vstack([self.Y, Y])
         self.parameter_update()
 
-    def parameter_update(self):
-        if self.X.shape[0] < 2:
-            return
-        def loss(theta):
-            preds = self.g.g(self.X @ theta)
-            errors = preds - self.Y.flatten()
-            weights = 1 / self.g.v(preds)
-            return np.sum((errors**2) * weights) + self.reg * np.linalg.norm(theta)**2
-
-        theta0 = np.concatenate([self.alpha, self.beta])
-        res = minimize(loss, theta0, method='L-BFGS-B')
-        if res.success:
-            theta_hat = res.x
-            d = self.environment_info.observation_space['features'].shape[0]
-            self.alpha = theta_hat[:d]
-            self.beta = theta_hat[d:]
+    def parameter_update(self, z, D_t):
+        """
+        One-step Sherman-Morrison update of the quasi-MLE for the *identity* link g(u)=u
+        (linear demand).  If you keep a general g, replace D_t by the *score* below.
+        """
+        if self.t == 0:
+            # first call: initialise
+            d = len(z)
+            self.M_inv = np.eye(d) / self.lam          # (λI)^{-1}
+            self.q = np.zeros(d)
+
+        # rank-1 update of M_t^{-1}
+        Mz = self.M_inv @ z
+        self.M_inv -= np.outer(Mz, Mz) / (1.0 + z @ Mz)
+
+        # running first-order term
+        self.q += z * D_t
+
+        # new parameter
+        theta_hat = self.M_inv @ self.q
+        d = theta_hat.size // 2
+        self.alpha, self.beta = theta_hat[:d], theta_hat[d:]
+
 
     def sample_design_matrix(self):
         d = self.environment_info.observation_space['features'].shape[0]
@@ -113,11 +120,31 @@ def compute_uncertainty_M(self, x_t):
         ])
         return np.linalg.inv(block_matrix @ np.linalg.inv(M) @ block_matrix.T)
 
-    def sample_from_confidence_region(self, theta_hat, M, N=50):
+    def sample_from_confidence_region(self, theta_hat, M, N=50, gamma=None):
+        """
+        Draw N points uniformly at random from
+            {theta : (theta - theta_hat)^T M^{-1} (theta - theta_hat) <= gamma}
+        """
+        d = len(theta_hat)                        # here d = 2·feature_dim
+        if gamma is None:
+            # Simple hard-coded radius like the authors’ demo: Γ = d / 20
+            # In production you would compute the analytic β_t²
+            gamma = d / 20.0
+
+        # Cholesky factor of M^{-1}
         L = np.linalg.cholesky(np.linalg.inv(M))
-        u = np.random.randn(len(theta_hat), N)
-        u /= np.linalg.norm(u, axis=0)
-        return (theta_hat[:, np.newaxis] + (1 / self.environment_info.observation_space['features'].shape[0]) * (L @ u)).T
+
+        # 1. Draw points *in* the unit ball (not only on the surface)
+        rng = np.random.default_rng()
+        u = rng.normal(size=(d, N))
+        u /= np.linalg.norm(u, axis=0)            # on the sphere
+        r = rng.random(N)**(1.0 / d)              # radii ∼ U[0,1]^{1/d}
+        u *= r                                    # now in the ball
+
+        # 2. Map ball → ellipsoid and 3. translate by theta_hat
+        samples = theta_hat[:, None] + np.sqrt(gamma) * (L @ u)
+        return samples.T                          # shape (N, d)
+
 
     def max_rev(self, samples, x):
         max_val = -np.inf
diff --git a/nbs/30_agents/42_DP_agents/13_UCB_agent.ipynb b/nbs/30_agents/42_DP_agents/13_UCB_agent.ipynb
@@ -117,22 +117,29 @@
     "        self.Y = np.vstack([self.Y, Y])\n",
     "        self.parameter_update()\n",
     "\n",
-    "    def parameter_update(self):\n",
-    "        if self.X.shape[0] < 2:\n",
-    "            return\n",
-    "        def loss(theta):\n",
-    "            preds = self.g.g(self.X @ theta)\n",
-    "            errors = preds - self.Y.flatten()\n",
-    "            weights = 1 / self.g.v(preds)\n",
-    "            return np.sum((errors**2) * weights) + self.reg * np.linalg.norm(theta)**2\n",
+    "    def parameter_update(self, z, D_t):\n",
+    "        \"\"\"\n",
+    "        One-step Sherman-Morrison update of the quasi-MLE for the *identity* link g(u)=u\n",
+    "        (linear demand).  If you keep a general g, replace D_t by the *score* below.\n",
+    "        \"\"\"\n",
+    "        if self.t == 0:\n",
+    "            # first call: initialise\n",
+    "            d = len(z)\n",
+    "            self.M_inv = np.eye(d) / self.lam          # (λI)^{-1}\n",
+    "            self.q = np.zeros(d)\n",
+    "\n",
+    "        # rank-1 update of M_t^{-1}\n",
+    "        Mz = self.M_inv @ z\n",
+    "        self.M_inv -= np.outer(Mz, Mz) / (1.0 + z @ Mz)\n",
+    "\n",
+    "        # running first-order term\n",
+    "        self.q += z * D_t\n",
+    "\n",
+    "        # new parameter\n",
+    "        theta_hat = self.M_inv @ self.q\n",
+    "        d = theta_hat.size // 2\n",
+    "        self.alpha, self.beta = theta_hat[:d], theta_hat[d:]\n",
     "\n",
-    "        theta0 = np.concatenate([self.alpha, self.beta])\n",
-    "        res = minimize(loss, theta0, method='L-BFGS-B')\n",
-    "        if res.success:\n",
-    "            theta_hat = res.x\n",
-    "            d = self.environment_info.observation_space['features'].shape[0]\n",
-    "            self.alpha = theta_hat[:d]\n",
-    "            self.beta = theta_hat[d:]\n",
     "\n",
     "    def sample_design_matrix(self):\n",
     "        d = self.environment_info.observation_space['features'].shape[0]\n",
@@ -150,11 +157,31 @@
     "        ])\n",
     "        return np.linalg.inv(block_matrix @ np.linalg.inv(M) @ block_matrix.T)\n",
     "\n",
-    "    def sample_from_confidence_region(self, theta_hat, M, N=50):\n",
+    "    def sample_from_confidence_region(self, theta_hat, M, N=50, gamma=None):\n",
+    "        \"\"\"\n",
+    "        Draw N points uniformly at random from\n",
+    "            {theta : (theta - theta_hat)^T M^{-1} (theta - theta_hat) <= gamma}\n",
+    "        \"\"\"\n",
+    "        d = len(theta_hat)                        # here d = 2·feature_dim\n",
+    "        if gamma is None:\n",
+    "            # Simple hard-coded radius like the authors’ demo: Γ = d / 20\n",
+    "            # In production you would compute the analytic β_t²\n",
+    "            gamma = d / 20.0\n",
+    "\n",
+    "        # Cholesky factor of M^{-1}\n",
     "        L = np.linalg.cholesky(np.linalg.inv(M))\n",
-    "        u = np.random.randn(len(theta_hat), N)\n",
-    "        u /= np.linalg.norm(u, axis=0)\n",
-    "        return (theta_hat[:, np.newaxis] + (1 / self.environment_info.observation_space['features'].shape[0]) * (L @ u)).T\n",
+    "\n",
+    "        # 1. Draw points *in* the unit ball (not only on the surface)\n",
+    "        rng = np.random.default_rng()\n",
+    "        u = rng.normal(size=(d, N))\n",
+    "        u /= np.linalg.norm(u, axis=0)            # on the sphere\n",
+    "        r = rng.random(N)**(1.0 / d)              # radii ∼ U[0,1]^{1/d}\n",
+    "        u *= r                                    # now in the ball\n",
+    "\n",
+    "        # 2. Map ball → ellipsoid and 3. translate by theta_hat\n",
+    "        samples = theta_hat[:, None] + np.sqrt(gamma) * (L @ u)\n",
+    "        return samples.T                          # shape (N, d)\n",
+    "\n",
     "\n",
     "    def max_rev(self, samples, x):\n",
     "        max_val = -np.inf\n",