Merge pull request #56 from freewym/work

Deep-unlearning · web-flow · commit 228bb7816657 · 2025-03-13T16:33:36.000+01:00
adds suppport of beam search with configurable num_beams for Phi-4-Multimodal
diff --git a/phi/run_eval.py b/phi/run_eval.py
@@ -57,7 +57,7 @@ def main(args):
 
     prompt = f"{user}<|audio_1|>{args.user_prompt}{prompt_suffix}{assistant}"
 
-    gen_kwargs = {"max_new_tokens": args.max_new_tokens}
+    gen_kwargs = {"max_new_tokens": args.max_new_tokens, "num_beams": args.num_beams}
 
     stop_tokens = [prompt_suffix, processor.tokenizer.eos_token]
     stop_tokens_ids = processor.tokenizer(stop_tokens, add_special_tokens=False, padding="longest", return_tensors="pt")["input_ids"]
@@ -67,7 +67,9 @@ def benchmark(batch, min_new_tokens=None):
         # Load audio inputs
         audios = [(audio["array"], audio["sampling_rate"]) for audio in batch["audio"]]
         minibatch_size = len(audios)
-        gen_kwargs["stopping_criteria"] = StoppingCriteriaList([MultipleTokenBatchStoppingCriteria(stop_tokens_ids, batch_size=minibatch_size)])
+        gen_kwargs["stopping_criteria"] = StoppingCriteriaList(
+            [MultipleTokenBatchStoppingCriteria(stop_tokens_ids, batch_size=args.num_beams * minibatch_size)]
+        )
 
         # START TIMING
         start_time = time.time()
@@ -212,6 +214,12 @@ def benchmark(batch, min_new_tokens=None):
         default=16,
         help="Number of samples to go through each streamed batch.",
     )
+    parser.add_argument(
+        "--num_beams",
+        type=int,
+        default=1,
+        help="Number of beams for beam search.",
+    )
     parser.add_argument(
         "--max_eval_samples",
         type=int,
diff --git a/phi/run_phi4_multimodal.sh b/phi/run_phi4_multimodal.sh
@@ -4,6 +4,7 @@ export PYTHONPATH="..":$PYTHONPATH
 
 MODEL_IDs=("microsoft/Phi-4-multimodal-instruct")
 BATCH_SIZE=32
+NUM_BEAMS=1
 MAX_NEW_TOKENS=512
 
 num_models=${#MODEL_IDs[@]}
@@ -20,6 +21,7 @@ do
         --split="test" \
         --device=0 \
         --batch_size=${BATCH_SIZE} \
+        --num_beams=${NUM_BEAMS} \
         --max_eval_samples=-1 \
         --max_new_tokens=${MAX_NEW_TOKENS} \
         --user_prompt="${default_user_prompt}"
@@ -31,6 +33,7 @@ do
         --split="test" \
         --device=0 \
         --batch_size=${BATCH_SIZE} \
+        --num_beams=${NUM_BEAMS} \
         --max_eval_samples=-1 \
         --max_new_tokens=${MAX_NEW_TOKENS} \
         --user_prompt="${default_user_prompt}"
@@ -42,6 +45,7 @@ do
         --split="test" \
         --device=0 \
         --batch_size=${BATCH_SIZE} \
+        --num_beams=${NUM_BEAMS} \
         --max_eval_samples=-1 \
         --max_new_tokens=${MAX_NEW_TOKENS} \
         --user_prompt="Transcribe the audio clip to English text."
@@ -53,6 +57,7 @@ do
         --split="test" \
         --device=0 \
         --batch_size=${BATCH_SIZE} \
+        --num_beams=${NUM_BEAMS} \
         --max_eval_samples=-1 \
         --max_new_tokens=${MAX_NEW_TOKENS} \
         --user_prompt="${default_user_prompt}"
@@ -64,6 +69,7 @@ do
         --split="test.clean" \
         --device=0 \
         --batch_size=${BATCH_SIZE} \
+        --num_beams=${NUM_BEAMS} \
         --max_eval_samples=-1 \
         --max_new_tokens=${MAX_NEW_TOKENS} \
         --user_prompt="${default_user_prompt}"
@@ -75,6 +81,7 @@ do
         --split="test.other" \
         --device=0 \
         --batch_size=${BATCH_SIZE} \
+        --num_beams=${NUM_BEAMS} \
         --max_eval_samples=-1 \
         --max_new_tokens=${MAX_NEW_TOKENS} \
         --user_prompt="${default_user_prompt}"
@@ -86,6 +93,7 @@ do
         --split="test" \
         --device=0 \
         --batch_size=${BATCH_SIZE} \
+        --num_beams=${NUM_BEAMS} \
         --max_eval_samples=-1 \
         --max_new_tokens=${MAX_NEW_TOKENS} \
         --user_prompt="${default_user_prompt}"
@@ -97,6 +105,7 @@ do
         --split="test" \
         --device=0 \
         --batch_size=${BATCH_SIZE} \
+        --num_beams=${NUM_BEAMS} \
         --max_eval_samples=-1 \
         --max_new_tokens=${MAX_NEW_TOKENS} \
         --user_prompt="${default_user_prompt}"