Fix Style control Bootstrapping (#3500)

CodingWithTim · CodingWithTim · web-flow · commit 3e21ddc164c1 · 2024-08-27T19:41:16.000-07:00
Co-authored-by: CodingWithTim &lt;tim@inst-builder-debian-12-build-build-4zqb5.us-central1-a.c.gce-image-builder.internal&gt;
diff --git a/fastchat/serve/monitor/elo_analysis.py b/fastchat/serve/monitor/elo_analysis.py
@@ -439,7 +439,7 @@ def construct_style_matrices(
     style_elements=STYLE_CONTROL_ELEMENTS_V1,
     add_one=True,
 ):
-    models = pd.concat([battles["model_a"], battles["model_b"]]).unique()
+    models = pd.concat([df["model_a"], df["model_b"]]).unique()
     models = pd.Series(np.arange(len(models)), index=models)
 
     # duplicate battles
@@ -498,12 +498,17 @@ def construct_style_matrices(
 def get_bootstrap_result_style_control(X, Y, models, func_compute_elo, num_round=1000):
     elos = []
     coefs = []
+    assert X.shape[0] % 2 == 0 and X.shape[0] == Y.shape[0]
+    k = int(
+        X.shape[0] / 2
+    )  # Since we duplicate the battles when constructing X and Y, we don't want to sample the duplicates
+
     for _ in tqdm(range(num_round), desc="bootstrap"):
-        indices = np.random.choice(
-            list(range(len(battles))), size=(len(battles)), replace=True
-        )
-        _X = X[indices]
-        _Y = Y[indices]
+        indices = np.random.choice(list(range(k)), size=(k), replace=True)
+        _X = np.concatenate([X[indices], X[indices]])
+        _Y = np.concatenate([Y[indices], Y[indices]])
+        assert _X.shape == X.shape and _Y.shape == Y.shape
+
         states = ~_X[:, : len(models)].any(axis=0)
 
         elo, coef = func_compute_elo(_X, _Y, models=models[~states])