add model training on synthetic data

VincentAuriau · VincentAuriau · commit 25858c2148cc · 2025-07-18T14:14:55.000+02:00
diff --git a/notebooks/synthetic_experiments.ipynb b/notebooks/synthetic_experiments.ipynb
@@ -25,9 +25,12 @@
    "source": [
     "import sys\n",
     "sys.path.append(\"../\")\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
     "\n",
     "import choice_learn\n",
-    "from python.data import SyntheticDataGenerator"
+    "from python.data import SyntheticDataGenerator\n",
+    "from choice_learn.basket_models import Trip, TripDataset"
    ]
   },
   {
@@ -68,25 +71,140 @@
     "dataset = data_gen.generate_dataset(n_baskets=1000)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "51791e7e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trip_list = []\n",
+    "for basket in dataset:\n",
+    "    trip_list.append(Trip(purchases=basket, prices=np.zeros((8, )), assortment=0))\n",
+    "\n",
+    "trip_dataset = TripDataset(trips=trip_list, available_items=np.ones((1, 8)))"
+   ]
+  },
   {
    "cell_type": "markdown",
-   "id": "f337217b",
+   "id": "52b4b18c",
+   "metadata": {},
+   "source": [
+    "## Modelling "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3d6c32e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from choice_learn.basket_models import AleaCarta"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6ef517b6",
    "metadata": {},
+   "outputs": [],
    "source": [
-    "### Sample purchased baskets\n",
+    "latent_sizes = {\"preferences\": 6, \"price\": 3, \"season\": 3}\n",
+    "n_negative_samples = 2\n",
+    "optimizer = \"adam\"\n",
+    "lr = 1e-2\n",
+    "epochs = 200\n",
+    "batch_size = 32\n",
     "\n",
-    "### Modelling\n",
+    "model = AleaCarta(\n",
+    "    item_intercept=False,\n",
+    "    price_effects=False,\n",
+    "    seasonal_effects=False,\n",
+    "    latent_sizes=latent_sizes,\n",
+    "    n_negative_samples=n_negative_samples,\n",
+    "    optimizer=optimizer,\n",
+    "    lr=lr,\n",
+    "    epochs=epochs,\n",
+    "    batch_size=batch_size,\n",
+    ")\n",
     "\n",
-    "### Results"
+    "model.instantiate(n_items=8, n_stores=2)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ba1b8457",
+   "id": "2f8a915e",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "history = model.fit(trip_dataset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1c78ef41",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.plot(history[\"train_loss\"])\n",
+    "plt.xlabel(\"Epoch\")\n",
+    "plt.ylabel(\"Training Loss\")\n",
+    "plt.legend()\n",
+    "plt.title(\"Training of Shopper\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f337217b",
+   "metadata": {},
+   "source": [
+    "## Results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e4008d65",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib as mpl\n",
+    "import numpy as np\n",
+    "\n",
+    "fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(5, 5))\n",
+    "mask = np.ones((8,8), dtype=bool)\n",
+    "res = []\n",
+    "for i in range(8):\n",
+    "        r = model.compute_batch_utility(item_batch=np.array(list(range(8))),\n",
+    "                basket_batch=np.array([[i] for _ in range(8)]) ,\n",
+    "                store_batch=np.array([0, 0, 0, 0, 0, 0, 0, 0]),\n",
+    "                week_batch=np.array([0, 0, 0, 0, 0, 0, 0, 0]),\n",
+    "                price_batch=np.array([[0, 0, 0, 0, 0, 0] for _ in range(8)]))\n",
+    "        m = np.ones(8)\n",
+    "        m[i] = 0\n",
+    "        den = np.exp(r) * m\n",
+    "        r = den / den.sum()\n",
+    "        # r = np.concatenate([tf.nn.softmax(np.concatenate([r[:i], r[i+1:]]))[:i], [.0], tf.nn.softmax(np.concatenate([r[:i], r[i+1:]]))[i:]])\n",
+    "        res.append(r)\n",
+    "        mask[i][i] = False\n",
+    "\n",
+    "res = np.stack(res)\n",
+    "mask = np.ma.masked_where(mask, res)\n",
+    "\n",
+    "axes.set_xticks([], [])\n",
+    "axes.set_yticks([], [])\n",
+    "im = axes.imshow(np.stack(res), cmap=\"Spectral\", alpha=0.99, vmin=0, vmax=1)\n",
+    "axes.imshow(mask, cmap=mpl.colors.ListedColormap(['white']), alpha=1)\n",
+    "\n",
+    "cbar_ax = fig.add_axes([0.92, 0.15, 0.02, 0.69])\n",
+    "fig.colorbar(im, cax=cbar_ax)\n",
+    "axes.set_title(\"Estimated Conditional Probabilities\")"
+   ]
   },
   {
    "cell_type": "markdown",