InfiniTensor · gongchensu · Dec 29, 2025
diff --git a/examples/jiuge.py b/examples/jiuge.py
@@ -46,6 +46,11 @@ def get_args():
         action="store_true",
         help="Run cambricon test",
     )
+    parser.add_argument(
+        "--hygon",
+        action="store_true",
+        help="Run hygon test",
+    )
     parser.add_argument(
         "--model_path",
         type=str,
@@ -199,9 +204,11 @@ def test(
         device_str = "cuda"
     elif args.cambricon:
         device_str = "mlu"
+    elif args.hygon:
+        device_str = "cuda"
     else:
         print(
-            "Usage:  python examples/jiuge.py [--cpu | --nvidia | --metax | --moore | --iluvatar] --model_path=<path/to/model_dir>\n"
+            "Usage:  python examples/jiuge.py [--cpu | --nvidia | --metax | --moore | --iluvatar | --cambricon | --hygon] --model_path=<path/to/model_dir>\n"
             "such as, python examples/jiuge.py --nvidia --model_path=~/TinyLlama-1.1B-Chat-v1.0"
         )
         sys.exit(1)

diff --git a/test/bench/test_benchmark.py b/test/bench/test_benchmark.py
@@ -62,16 +62,18 @@ def __init__(
         self.benchmark = benchmark
 
         # Map device type string to infinicore device
+        # Note: These map to the Python device type strings used by infinicore.device()
+        # which correspond to _TORCH_DEVICE_MAP values in InfiniCore/python/infinicore/device.py
         device_map = {
             "cpu": "cpu",
             "nvidia": "cuda",
             "cambricon": "mlu",
-            "ascend": "ascend",
-            "metax": "metax",
-            "moore": "moore",
-            "iluvatar": "iluvatar",
-            "kunlun": "kunlun",
-            "hygon": "hygon",
+            "ascend": "npu",
+            "metax": "cuda",
+            "moore": "musa",
+            "iluvatar": "cuda",
+            "kunlun": "cuda",
+            "hygon": "cuda",
         }
 
         device_name = device_map.get(device_type_str.lower(), "cpu")
@@ -180,6 +182,13 @@ def _generate_step(self, tokens, max_steps, topp_, topk_, temperature_):
 
         start_time = time.perf_counter()
 
+        # For cpp backend, reset cache before generation if use_cache is enabled
+        if self.model.use_cache and hasattr(self.model, "_model") and hasattr(self.model._model, "reset_cache"):
+            batch_size = input_ids.shape[0]
+            seq_len = input_ids.shape[1]
+            max_cache_len = max_steps + seq_len
+            self.model.reset_cache(batch_size=batch_size, initial_capacity=max_cache_len)
+
         # Use model's built-in generate() method which properly handles KV cache
         # Pass sampling parameters (temperature, topk, topp) via kwargs
         output_ids = self.model.generate(