Skip to content

Commit 228bb78

Browse files
Merge pull request #56 from freewym/work
adds suppport of beam search with configurable num_beams for Phi-4-Multimodal
2 parents 3953c59 + 86c9747 commit 228bb78

File tree

2 files changed

+19
-2
lines changed

2 files changed

+19
-2
lines changed

phi/run_eval.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def main(args):
5757

5858
prompt = f"{user}<|audio_1|>{args.user_prompt}{prompt_suffix}{assistant}"
5959

60-
gen_kwargs = {"max_new_tokens": args.max_new_tokens}
60+
gen_kwargs = {"max_new_tokens": args.max_new_tokens, "num_beams": args.num_beams}
6161

6262
stop_tokens = [prompt_suffix, processor.tokenizer.eos_token]
6363
stop_tokens_ids = processor.tokenizer(stop_tokens, add_special_tokens=False, padding="longest", return_tensors="pt")["input_ids"]
@@ -67,7 +67,9 @@ def benchmark(batch, min_new_tokens=None):
6767
# Load audio inputs
6868
audios = [(audio["array"], audio["sampling_rate"]) for audio in batch["audio"]]
6969
minibatch_size = len(audios)
70-
gen_kwargs["stopping_criteria"] = StoppingCriteriaList([MultipleTokenBatchStoppingCriteria(stop_tokens_ids, batch_size=minibatch_size)])
70+
gen_kwargs["stopping_criteria"] = StoppingCriteriaList(
71+
[MultipleTokenBatchStoppingCriteria(stop_tokens_ids, batch_size=args.num_beams * minibatch_size)]
72+
)
7173

7274
# START TIMING
7375
start_time = time.time()
@@ -212,6 +214,12 @@ def benchmark(batch, min_new_tokens=None):
212214
default=16,
213215
help="Number of samples to go through each streamed batch.",
214216
)
217+
parser.add_argument(
218+
"--num_beams",
219+
type=int,
220+
default=1,
221+
help="Number of beams for beam search.",
222+
)
215223
parser.add_argument(
216224
"--max_eval_samples",
217225
type=int,

phi/run_phi4_multimodal.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ export PYTHONPATH="..":$PYTHONPATH
44

55
MODEL_IDs=("microsoft/Phi-4-multimodal-instruct")
66
BATCH_SIZE=32
7+
NUM_BEAMS=1
78
MAX_NEW_TOKENS=512
89

910
num_models=${#MODEL_IDs[@]}
@@ -20,6 +21,7 @@ do
2021
--split="test" \
2122
--device=0 \
2223
--batch_size=${BATCH_SIZE} \
24+
--num_beams=${NUM_BEAMS} \
2325
--max_eval_samples=-1 \
2426
--max_new_tokens=${MAX_NEW_TOKENS} \
2527
--user_prompt="${default_user_prompt}"
@@ -31,6 +33,7 @@ do
3133
--split="test" \
3234
--device=0 \
3335
--batch_size=${BATCH_SIZE} \
36+
--num_beams=${NUM_BEAMS} \
3437
--max_eval_samples=-1 \
3538
--max_new_tokens=${MAX_NEW_TOKENS} \
3639
--user_prompt="${default_user_prompt}"
@@ -42,6 +45,7 @@ do
4245
--split="test" \
4346
--device=0 \
4447
--batch_size=${BATCH_SIZE} \
48+
--num_beams=${NUM_BEAMS} \
4549
--max_eval_samples=-1 \
4650
--max_new_tokens=${MAX_NEW_TOKENS} \
4751
--user_prompt="Transcribe the audio clip to English text."
@@ -53,6 +57,7 @@ do
5357
--split="test" \
5458
--device=0 \
5559
--batch_size=${BATCH_SIZE} \
60+
--num_beams=${NUM_BEAMS} \
5661
--max_eval_samples=-1 \
5762
--max_new_tokens=${MAX_NEW_TOKENS} \
5863
--user_prompt="${default_user_prompt}"
@@ -64,6 +69,7 @@ do
6469
--split="test.clean" \
6570
--device=0 \
6671
--batch_size=${BATCH_SIZE} \
72+
--num_beams=${NUM_BEAMS} \
6773
--max_eval_samples=-1 \
6874
--max_new_tokens=${MAX_NEW_TOKENS} \
6975
--user_prompt="${default_user_prompt}"
@@ -75,6 +81,7 @@ do
7581
--split="test.other" \
7682
--device=0 \
7783
--batch_size=${BATCH_SIZE} \
84+
--num_beams=${NUM_BEAMS} \
7885
--max_eval_samples=-1 \
7986
--max_new_tokens=${MAX_NEW_TOKENS} \
8087
--user_prompt="${default_user_prompt}"
@@ -86,6 +93,7 @@ do
8693
--split="test" \
8794
--device=0 \
8895
--batch_size=${BATCH_SIZE} \
96+
--num_beams=${NUM_BEAMS} \
8997
--max_eval_samples=-1 \
9098
--max_new_tokens=${MAX_NEW_TOKENS} \
9199
--user_prompt="${default_user_prompt}"
@@ -97,6 +105,7 @@ do
97105
--split="test" \
98106
--device=0 \
99107
--batch_size=${BATCH_SIZE} \
108+
--num_beams=${NUM_BEAMS} \
100109
--max_eval_samples=-1 \
101110
--max_new_tokens=${MAX_NEW_TOKENS} \
102111
--user_prompt="${default_user_prompt}"

0 commit comments

Comments
 (0)