fix jamba accuracy test failure (#3466)

blzheng · web-flow · commit 372f5c519eb7 · 2025-01-22T12:36:44.000+08:00
diff --git a/examples/cpu/llm/inference/distributed/run_accuracy_with_deepspeed.py b/examples/cpu/llm/inference/distributed/run_accuracy_with_deepspeed.py
@@ -730,6 +730,8 @@ def _model_call(
             example_dict["output_router_logits"] = torch.tensor(
                 model_inputs["output_router_logits"]
             )
+        if self.config.architectures[0] == "JambaForCausalLM":
+            example_dict["num_logits_to_keep"] = torch.tensor(0)
 
         with torch.inference_mode(), torch.no_grad(), torch.cpu.amp.autocast(
             enabled=True if args.quant_with_amp or self._dtype == "bfloat16" else False,
diff --git a/examples/cpu/llm/inference/single_instance/run_accuracy.py b/examples/cpu/llm/inference/single_instance/run_accuracy.py
@@ -431,6 +431,8 @@ def _model_call(
             example_dict["output_router_logits"] = torch.tensor(
                 model_inputs["output_router_logits"]
             )
+        if self.config.architectures[0] == "JambaForCausalLM":
+            example_dict["num_logits_to_keep"] = torch.tensor(0)
 
         with torch.inference_mode(), torch.no_grad(), torch.cpu.amp.autocast(
             enabled=True if args.quant_with_amp or self._dtype == "bfloat16" else False,

Original file line number	Diff line number	Diff line change
`@@ -730,6 +730,8 @@ def _model_call(`
`730`	`730`	`example_dict["output_router_logits"] = torch.tensor(`
`731`	`731`	`model_inputs["output_router_logits"]`
`732`	`732`	`)`
	`733`	`+ if self.config.architectures[0] == "JambaForCausalLM":`
	`734`	`+ example_dict["num_logits_to_keep"] = torch.tensor(0)`
`733`	`735`
`734`	`736`	`with torch.inference_mode(), torch.no_grad(), torch.cpu.amp.autocast(`
`735`	`737`	`enabled=True if args.quant_with_amp or self._dtype == "bfloat16" else False,`
Original file line number	Diff line number	Diff line change
`@@ -431,6 +431,8 @@ def _model_call(`
`431`	`431`	`example_dict["output_router_logits"] = torch.tensor(`
`432`	`432`	`model_inputs["output_router_logits"]`
`433`	`433`	`)`
	`434`	`+ if self.config.architectures[0] == "JambaForCausalLM":`
	`435`	`+ example_dict["num_logits_to_keep"] = torch.tensor(0)`
`434`	`436`
`435`	`437`	`with torch.inference_mode(), torch.no_grad(), torch.cpu.amp.autocast(`
`436`	`438`	`enabled=True if args.quant_with_amp or self._dtype == "bfloat16" else False,`