Skip to content

Commit 3e21ddc

Browse files
CodingWithTimCodingWithTim
andauthored
Fix Style control Bootstrapping (#3500)
Co-authored-by: CodingWithTim <tim@inst-builder-debian-12-build-build-4zqb5.us-central1-a.c.gce-image-builder.internal>
1 parent 4e62d77 commit 3e21ddc

File tree

1 file changed

+11
-6
lines changed

1 file changed

+11
-6
lines changed

fastchat/serve/monitor/elo_analysis.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ def construct_style_matrices(
439439
style_elements=STYLE_CONTROL_ELEMENTS_V1,
440440
add_one=True,
441441
):
442-
models = pd.concat([battles["model_a"], battles["model_b"]]).unique()
442+
models = pd.concat([df["model_a"], df["model_b"]]).unique()
443443
models = pd.Series(np.arange(len(models)), index=models)
444444

445445
# duplicate battles
@@ -498,12 +498,17 @@ def construct_style_matrices(
498498
def get_bootstrap_result_style_control(X, Y, models, func_compute_elo, num_round=1000):
499499
elos = []
500500
coefs = []
501+
assert X.shape[0] % 2 == 0 and X.shape[0] == Y.shape[0]
502+
k = int(
503+
X.shape[0] / 2
504+
) # Since we duplicate the battles when constructing X and Y, we don't want to sample the duplicates
505+
501506
for _ in tqdm(range(num_round), desc="bootstrap"):
502-
indices = np.random.choice(
503-
list(range(len(battles))), size=(len(battles)), replace=True
504-
)
505-
_X = X[indices]
506-
_Y = Y[indices]
507+
indices = np.random.choice(list(range(k)), size=(k), replace=True)
508+
_X = np.concatenate([X[indices], X[indices]])
509+
_Y = np.concatenate([Y[indices], Y[indices]])
510+
assert _X.shape == X.shape and _Y.shape == Y.shape
511+
507512
states = ~_X[:, : len(models)].any(axis=0)
508513

509514
elo, coef = func_compute_elo(_X, _Y, models=models[~states])

0 commit comments

Comments
 (0)