Merge branch 'main' into update-lb

infwinston · infwinston · commit d69af13b930c · 2024-08-28T02:46:38.000Z
diff --git a/fastchat/serve/monitor/elo_analysis.py b/fastchat/serve/monitor/elo_analysis.py
@@ -597,7 +597,7 @@ def construct_style_matrices(
     style_elements=STYLE_CONTROL_ELEMENTS_V1,
     add_one=True,
 ):
-    models = pd.concat([battles["model_a"], battles["model_b"]]).unique()
+    models = pd.concat([df["model_a"], df["model_b"]]).unique()
     models = pd.Series(np.arange(len(models)), index=models)
 
     # duplicate battles
@@ -656,12 +656,17 @@ def construct_style_matrices(
 def get_bootstrap_result_style_control(X, Y, models, func_compute_elo, num_round=1000):
     elos = []
     coefs = []
+    assert X.shape[0] % 2 == 0 and X.shape[0] == Y.shape[0]
+    k = int(
+        X.shape[0] / 2
+    )  # Since we duplicate the battles when constructing X and Y, we don't want to sample the duplicates
+
     for _ in tqdm(range(num_round), desc="bootstrap"):
-        indices = np.random.choice(
-            list(range(len(battles))), size=(len(battles)), replace=True
-        )
-        _X = X[indices]
-        _Y = Y[indices]
+        indices = np.random.choice(list(range(k)), size=(k), replace=True)
+        _X = np.concatenate([X[indices], X[indices]])
+        _Y = np.concatenate([Y[indices], Y[indices]])
+        assert _X.shape == X.shape and _Y.shape == Y.shape
+
         states = ~_X[:, : len(models)].any(axis=0)
 
         elo, coef = func_compute_elo(_X, _Y, models=models[~states])
diff --git a/fastchat/serve/vision/create_vqa_examples_dir.py b/fastchat/serve/vision/create_vqa_examples_dir.py
@@ -104,6 +104,14 @@ def download_images_and_create_json(
             "subset": False,
             "split": "test",
         },
+        "NewYorker": {
+            "path": "jmhessel/newyorker_caption_contest",
+            "image_key": "image",
+            "question_key": "questions",
+            "id_key": "index",
+            "subset": "explanation",
+            "split": "train",
+        },
     }
 
     download_images_and_create_json(
@@ -114,7 +122,7 @@ def download_images_and_create_json(
         with open(f"{args.output_dir}/{dataset_name}/data.json") as f:
             data = json.load(f)
             print(f"Dataset: {dataset_name}, Number of examples: {len(data)}")
-            dataset_json.extend(np.random.choice(data, 500))
+            dataset_json.extend(data)
 
     with open(f"{args.output_dir}/metadata_sampled.json", "w") as f:
         json.dump(dataset_json, f, indent=4)
diff --git a/fastchat/serve/vision/create_vqa_examples_json.py b/fastchat/serve/vision/create_vqa_examples_json.py
@@ -17,11 +17,12 @@
     args = parser.parse_args()
 
     dataset_prop = {
-        "DocVQA": 500,
-        "ChartQA": 500,
-        "NewYorker": 1000,
-        "WikiArt": 500,
-        "TextVQA": 500,
+        "realworldqa": 500,
+        "Memes": 500,
+        "Floorplan": 500,
+        "Website": 500,
+        "IllusionVQA": 500,
+        "NewYorker": 500,
     }
 
     dataset_json = []