Fix multi-image format for expert model training (#71)

holgerroth · web-flow · commit 6ba04b742e0d · 2025-03-10T10:46:17.000-04:00
Update training data json files to use `"images": [path1, path2, ...]`
format and using only `&lt;image&gt;` placeholder in prompts.

This will follow the VILA training format, e.g.:

```
{
  "images": ["a.jpg", "b.jpg", "c.jpg"]
   conversations": [
            {
                "from": "human",
                "value": "&lt;image&gt; &lt;image&gt;  &lt;image&gt; what object is shown in the images?\n"
            },
}
```
diff --git a/m3/data_prepare/experts/README.md b/m3/data_prepare/experts/README.md
@@ -7,7 +7,7 @@ We can take existing CT datasets, run [VISTA3D](https://github.com/Project-MONAI
 export PYTHONPATH=${PWD}/..
 ROOT_DIR=../../data/experts/vista3d/inference_results
 OUT_FILEPREFIX="../../data/experts/vista3d/llama_gen_expert_data_vista3d_what"
-python expert_train_data_cxr.py --in_datapath ${IN_DATAPATH} --root_dir ${ROOT_DIR} --out_fileprefix ${OUT_FILEPREFIX}
+python expert_train_data_vista3d.py --in_datapath ${IN_DATAPATH} --root_dir ${ROOT_DIR} --out_fileprefix ${OUT_FILEPREFIX}
 ```
 
 ### 2. Prepare expert training data for BRATS 
@@ -43,3 +43,11 @@ python expert_train_data_brats.py --in_meta_data ${META_DATA} --images_root ${RO
 
 ### 2. Prepare expert training data for TorchXRayVision
 For details on how to prepare training & evaluation data with an TorchXRayVision expert model ensemble, see [here](./torchxrayvision/README.md).
+
+And run a command similar to this
+```commandline
+export PYTHONPATH=${PWD}/..
+ROOT_DIR=../../data/experts/cxr/inference_results
+OUT_FILEPREFIX="../../data/experts/cxr/cxr_expert"
+python expert_train_data_cxr.py --in_datapath ${IN_DATAPATH} --root_dir ${ROOT_DIR} --out_fileprefix ${OUT_FILEPREFIX}
+```
diff --git a/m3/data_prepare/experts/expert_train_data_brats.py b/m3/data_prepare/experts/expert_train_data_brats.py
@@ -14,7 +14,7 @@
 import random
 
 from data_utils import read_json, read_txt, write_json
-from expert_utils import add_brats_expert_conversation, assert_image_placeholder, get_predictions, model_list
+from expert_utils import add_brats_expert_conversation, model_list
 from tqdm import tqdm
 
 random.seed(0)
@@ -37,11 +37,7 @@ def main(args):
     for meta in tqdm(in_data, desc="creating train data..."):
         # create a q & a conversation
         entry = {
-            "image1": meta["image"][0],
-            "image2": meta["image"][1],
-            "image3": meta["image"][2],
-            "image4": meta["image"][3],
-            "segmentation": meta["label"],
+            "images": [meta["image"][0], meta["image"][1], meta["image"][2], meta["image"][3], meta["label"]],
         }
 
         # what question
diff --git a/m3/data_prepare/experts/expert_train_data_vista3d.py b/m3/data_prepare/experts/expert_train_data_vista3d.py
@@ -156,7 +156,7 @@ def main(args):
 
         id = str(uuid.uuid4())
 
-        entry = {"image": image, "id": id}
+        entry = {"images": [image], "id": id}
 
         if "tumor" in group_name or "lesion" in group_name:
             # tumor task
@@ -173,7 +173,7 @@ def main(args):
             conv.append(
                 {
                     "from": "human",
-                    "value": f"The results are <segmentation>. The colors in this image describe {m['label_colors']}. "
+                    "value": f"The results are <image>. The colors in this image describe {m['label_colors']}. "
                     f"Use this result to respond to this prompt:\n{question}.",
                 }
             )
@@ -206,7 +206,7 @@ def main(args):
                 answer = "no"
                 conv.append({"from": "gpt", "value": answer})
 
-            entry["segmentation"] = label
+            entry["images"].append(label)
         else:  # segmentation or what is task
             segment_task = True if random.random() > 0.5 else False
             if segment_task:
@@ -230,7 +230,7 @@ def main(args):
                 conv.append(
                     {
                         "from": "human",
-                        "value": f"The results are <segmentation>. "
+                        "value": f"The results are <image>. "
                         f"The colors in this image describe {m['label_colors']}. "
                         f"Use this result to respond to this prompt:\n{question}.",
                     }
diff --git a/m3/data_prepare/experts/expert_utils.py b/m3/data_prepare/experts/expert_utils.py
@@ -87,15 +87,15 @@ def add_brats_expert_conversation(conv, trigger="This looks like an MRI image se
         {
             "from": "human",
             "value": model_list
-            + f"T1(contrast enhanced): <image1>, T1: <image2>, T2: <image3>, FLAIR: <image4> These are different MRI modalities.\n"
+            + f"T1(contrast enhanced): <image>, T1: <image>, T2: <image>, FLAIR: <image> These are different MRI modalities.\n"
             + first_prompt,
         }
     )
     new_conv.append({"from": "gpt", "value": trigger})
     new_conv.append(
         {
             "from": "human",
-            "value": f"The results are <segmentation>. The colors in this image describe\nyellow and red: tumor core, only yellow: enhancing tumor, all colors: whole tumor\nUse this result to respond to this prompt:\n{first_prompt}.",
+            "value": f"The results are <image>. The colors in this image describe\nyellow and red: tumor core, only yellow: enhancing tumor, all colors: whole tumor\nUse this result to respond to this prompt:\n{first_prompt}.",
         }
     )
     new_conv.extend(conv[1::])

Original file line number	Diff line number	Diff line change
`@@ -156,7 +156,7 @@ def main(args):`
`156`	`156`
`157`	`157`	`id = str(uuid.uuid4())`
`158`	`158`
`159`		`- entry = {"image": image, "id": id}`
	`159`	`+ entry = {"images": [image], "id": id}`
`160`	`160`
`161`	`161`	`if "tumor" in group_name or "lesion" in group_name:`
`162`	`162`	`# tumor task`
`@@ -173,7 +173,7 @@ def main(args):`
`173`	`173`	`conv.append(`
`174`	`174`	`{`
`175`	`175`	`"from": "human",`
`176`		`- "value": f"The results are <segmentation>. The colors in this image describe {m['label_colors']}. "`
	`176`	`+ "value": f"The results are <image>. The colors in this image describe {m['label_colors']}. "`
`177`	`177`	`f"Use this result to respond to this prompt:\n{question}.",`
`178`	`178`	`}`
`179`	`179`	`)`
`@@ -206,7 +206,7 @@ def main(args):`
`206`	`206`	`answer = "no"`
`207`	`207`	`conv.append({"from": "gpt", "value": answer})`
`208`	`208`
`209`		`- entry["segmentation"] = label`
	`209`	`+ entry["images"].append(label)`
`210`	`210`	`else: # segmentation or what is task`
`211`	`211`	`segment_task = True if random.random() > 0.5 else False`
`212`	`212`	`if segment_task:`
`@@ -230,7 +230,7 @@ def main(args):`
`230`	`230`	`conv.append(`
`231`	`231`	`{`
`232`	`232`	`"from": "human",`
`233`		`- "value": f"The results are <segmentation>. "`
	`233`	`+ "value": f"The results are <image>. "`
`234`	`234`	`f"The colors in this image describe {m['label_colors']}. "`
`235`	`235`	`f"Use this result to respond to this prompt:\n{question}.",`
`236`	`236`	`}`
Original file line number	Diff line number	Diff line change
`@@ -87,15 +87,15 @@ def add_brats_expert_conversation(conv, trigger="This looks like an MRI image se`
`87`	`87`	`{`
`88`	`88`	`"from": "human",`
`89`	`89`	`"value": model_list`
`90`		`- + f"T1(contrast enhanced): <image1>, T1: <image2>, T2: <image3>, FLAIR: <image4> These are different MRI modalities.\n"`
	`90`	`+ + f"T1(contrast enhanced): <image>, T1: <image>, T2: <image>, FLAIR: <image> These are different MRI modalities.\n"`
`91`	`91`	`+ first_prompt,`
`92`	`92`	`}`
`93`	`93`	`)`
`94`	`94`	`new_conv.append({"from": "gpt", "value": trigger})`
`95`	`95`	`new_conv.append(`
`96`	`96`	`{`
`97`	`97`	`"from": "human",`
`98`		`- "value": f"The results are <segmentation>. The colors in this image describe\nyellow and red: tumor core, only yellow: enhancing tumor, all colors: whole tumor\nUse this result to respond to this prompt:\n{first_prompt}.",`
	`98`	`+ "value": f"The results are <image>. The colors in this image describe\nyellow and red: tumor core, only yellow: enhancing tumor, all colors: whole tumor\nUse this result to respond to this prompt:\n{first_prompt}.",`
`99`	`99`	`}`
`100`	`100`	`)`
`101`	`101`	`new_conv.extend(conv[1::])`