Set seed in Turbo tutorial + tiny improvements (meta-pytorch#1643)

esantorella · facebook-github-bot · commit 72e872aa7287 · 2023-01-30T05:55:50.000-08:00
Summary: ## Motivation The graphs in meta-pytorch#1591 show that tutorials tend to vary quite a bit from run to run in terms of both runtime and memory usage, especially Turbo. This will make it hard for us to spot improvements or degradations in performance. I also checked the information_theoretic_acquisition_functions, constraint_active_search, and composite_bo_with_hogp tutorials to see if setting the seed would help since those seemed to be a bit variable too, but they seem to be (numerically) deterministic. I also attempted to take a look at the thompson_sampling tutorial, but it's just been making my laptop warm and loud. ### Have you read the [Contributing Guidelines on pull requests](https://github.com/pytorch/botorch/blob/main/CONTRIBUTING.md#pull-requests)? [x] Set seed in Turbo tutorial [x] Added command-line argument for running one tutorial by name [x]: `composite_bo_with_hogp` tutorial: Remove 'maxiter' and 'disp' arguments from `fit_gpytorch_torch` , since they were being ignored anyway (Adam doesn't accept them). Print instead of logging. [ ] Run Turbo without smoke test in order to make sure the inputs match the outputs again Pull Request resolved: meta-pytorch#1643 Test Plan: - Repeated Actions runs confirm that Turbo is much more consistent in runtime and memory before Reviewed By: Balandat Differential Revision: D42723251 Pulled By: esantorella fbshipit-source-id: b4209845f5e695066153bcceee75a80462854863
diff --git a/botorch/optim/optimize.py b/botorch/optim/optimize.py
@@ -45,7 +45,6 @@
     "sample_around_best",
     "sample_around_best_sigma",
     "sample_around_best_prob_perturb",
-    "sample_around_best_prob_perturb",
     "seed",
     "thinning",
 }
diff --git a/scripts/run_tutorials.py b/scripts/run_tutorials.py
@@ -103,17 +103,24 @@ def run_tutorials(
     repo_dir: str,
     include_ignored: bool = False,
     smoke_test: bool = False,
+    name: Optional[str] = None,
 ) -> None:
-    print(f"Running tutorials in {'smoke test' if smoke_test else 'standard'} mode.")
+    print(f"Running tutorial(s) in {'smoke test' if smoke_test else 'standard'} mode.")
     if not smoke_test:
         print("This may take a long time...")
     tutorial_dir = Path(repo_dir).joinpath("tutorials")
     num_runs = 0
     num_errors = 0
     ignored_tutorials = IGNORE if smoke_test else IGNORE | IGNORE_SMOKE_TEST_ONLY
-    for tutorial in tutorial_dir.iterdir():
-        if not tutorial.is_file or tutorial.suffix != ".ipynb":
-            continue
+
+    tutorials = sorted(
+        t for t in tutorial_dir.iterdir() if t.is_file and t.suffix == ".ipynb"
+    )
+    if name is not None:
+        tutorials = [t for t in tutorials if t.name == name]
+        if len(tutorials) == 0:
+            raise RuntimeError(f"Specified tutorial {name} not found in directory.")
+    for tutorial in tutorials:
         if not include_ignored and tutorial.name in ignored_tutorials:
             print(f"Ignoring tutorial {tutorial.name}.")
             continue
@@ -141,9 +148,17 @@ def run_tutorials(
         action="store_true",
         help="Run all tutorials (incl. ignored).",
     )
+    parser.add_argument(
+        "-n",
+        "--name",
+        help="Run a specific tutorial by name. The name should include the "
+        ".ipynb extension. If the tutorial is on the ignore list, you still need "
+        "to specify --include-ignored.",
+    )
     args = parser.parse_args()
     run_tutorials(
         repo_dir=args.path,
         include_ignored=args.include_ignored,
         smoke_test=args.smoke,
+        name=args.name,
     )
diff --git a/tutorials/composite_bo_with_hogp.ipynb b/tutorials/composite_bo_with_hogp.ipynb
@@ -63,7 +63,6 @@
       "source": [
         "import torch\n",
         "import os\n",
-        "import logging\n",
         "import math\n",
         "import matplotlib.pyplot as plt\n",
         "import time\n",
@@ -389,11 +388,9 @@
         "if SMOKE_TEST:\n",
         "    n_batches = 1\n",
         "    batch_size = 2\n",
-        "    n_trials = 1\n",
         "else:\n",
         "    n_batches = 15\n",
-        "    batch_size = 4\n",
-        "    n_trials = 3"
+        "    batch_size = 4"
       ],
       "execution_count": 8,
       "outputs": []
@@ -421,8 +418,6 @@
         "customOutput": null
       },
       "source": [
-        "all_objective_vals = []\n",
-        "\n",
         "with gpt_settings.cholesky_jitter(1e-4):\n",
         "    c_batched, objective, bounds, num_samples = prepare_data(device=device, dtype=dtype)\n",
         "\n",
@@ -440,7 +435,7 @@
         "        # get best observations, log status\n",
         "        best_f = {k: objective(v).max().detach() for k, v in train_Y.items()}\n",
         "\n",
-        "        logging.info(\n",
+        "        print(\n",
         "            f\"It {i+1:>2}/{n_batches}, best obs.: \"\n",
         "            \", \".join([f\"{k}: {v:.3f}\" for k, v in best_f.items()])\n",
         "        )\n",
@@ -465,7 +460,7 @@
         "        )\n",
         "\n",
         "        mll = ExactMarginalLogLikelihood(model_ei.likelihood, model_ei)\n",
-        "        fit_gpytorch_torch(mll, options={\"lr\": 0.01, \"maxiter\": 3000, \"disp\": False})\n",
+        "        fit_gpytorch_torch(mll, options={\"lr\": 0.01})\n",
         "\n",
         "        # generate qEI candidate (single output modeling)\n",
         "        qEI = qExpectedImprovement(model_ei, best_f=best_f[\"ei\"], sampler=sampler)\n",
@@ -480,7 +475,7 @@
         "        )\n",
         "\n",
         "        mll = ExactMarginalLogLikelihood(model_ei_hogp_cf.likelihood, model_ei_hogp_cf)\n",
-        "        fit_gpytorch_torch(mll, options={\"lr\": 0.01, \"maxiter\": 3000, \"disp\": False})\n",
+        "        fit_gpytorch_torch(mll, options={\"lr\": 0.01})\n",
         "\n",
         "        # generate qEI candidate (multi-output modeling)\n",
         "        qEI_hogp_cf = qExpectedImprovement(\n",
@@ -498,7 +493,7 @@
         "                train_X[k] = torch.cat([Xold, Xnew])\n",
         "                train_Y[k] = torch.cat([train_Y[k], c_batched(Xnew)])\n",
         "\n",
-        "        logging.info(f\"Wall time: {time.monotonic() - tic:1f}\")\n",
+        "        print(f\"Wall time: {time.monotonic() - tic:1f}\")\n",
         "\n",
         "    objective_dict = {k: objective(train_Y[k]) for k in train_Y}"
       ],
diff --git a/tutorials/turbo_1.ipynb b/tutorials/turbo_1.ipynb
@@ -401,6 +401,7 @@
     "RAW_SAMPLES = 512 if not SMOKE_TEST else 4\n",
     "N_CANDIDATES = min(5000, max(2000, 200 * dim)) if not SMOKE_TEST else 4\n",
     "\n",
+    "torch.manual_seed(0)\n",
     "\n",
     "while not state.restart_triggered:  # Run until TuRBO converges\n",
     "    # Fit a GP model\n",
@@ -565,6 +566,8 @@
     }
    ],
    "source": [
+    "torch.manual_seed(0)\n",
+    "\n",
     "X_ei = get_initial_points(dim, n_init)\n",
     "Y_ei = torch.tensor(\n",
     "    [eval_objective(x) for x in X_ei], dtype=dtype, device=device\n",
@@ -700,7 +703,7 @@
    "bento/extensions/theme/main.css": true
   },
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -714,7 +717,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.3"
+   "version": "3.10.8"
   }
  },
  "nbformat": 4,

Original file line number	Diff line number	Diff line change
`@@ -45,7 +45,6 @@`
`45`	`45`	`"sample_around_best",`
`46`	`46`	`"sample_around_best_sigma",`
`47`	`47`	`"sample_around_best_prob_perturb",`
`48`		`- "sample_around_best_prob_perturb",`
`49`	`48`	`"seed",`
`50`	`49`	`"thinning",`
`51`	`50`	`}`