Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion examples/jiuge.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ def get_args():
action="store_true",
help="Run cambricon test",
)
parser.add_argument(
"--hygon",
action="store_true",
help="Run hygon test",
)
parser.add_argument(
"--model_path",
type=str,
Expand Down Expand Up @@ -199,9 +204,11 @@ def test(
device_str = "cuda"
elif args.cambricon:
device_str = "mlu"
elif args.hygon:
device_str = "cuda"
else:
print(
"Usage: python examples/jiuge.py [--cpu | --nvidia | --metax | --moore | --iluvatar] --model_path=<path/to/model_dir>\n"
"Usage: python examples/jiuge.py [--cpu | --nvidia | --metax | --moore | --iluvatar | --cambricon | --hygon] --model_path=<path/to/model_dir>\n"
"such as, python examples/jiuge.py --nvidia --model_path=~/TinyLlama-1.1B-Chat-v1.0"
)
sys.exit(1)
Expand Down
21 changes: 15 additions & 6 deletions test/bench/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,16 +62,18 @@ def __init__(
self.benchmark = benchmark

# Map device type string to infinicore device
# Note: These map to the Python device type strings used by infinicore.device()
# which correspond to _TORCH_DEVICE_MAP values in InfiniCore/python/infinicore/device.py
device_map = {
"cpu": "cpu",
"nvidia": "cuda",
"cambricon": "mlu",
"ascend": "ascend",
"metax": "metax",
"moore": "moore",
"iluvatar": "iluvatar",
"kunlun": "kunlun",
"hygon": "hygon",
"ascend": "npu",
"metax": "cuda",
"moore": "musa",
"iluvatar": "cuda",
"kunlun": "cuda",
"hygon": "cuda",
}

device_name = device_map.get(device_type_str.lower(), "cpu")
Expand Down Expand Up @@ -180,6 +182,13 @@ def _generate_step(self, tokens, max_steps, topp_, topk_, temperature_):

start_time = time.perf_counter()

# For cpp backend, reset cache before generation if use_cache is enabled
if self.model.use_cache and hasattr(self.model, "_model") and hasattr(self.model._model, "reset_cache"):
batch_size = input_ids.shape[0]
seq_len = input_ids.shape[1]
max_cache_len = max_steps + seq_len
self.model.reset_cache(batch_size=batch_size, initial_capacity=max_cache_len)

# Use model's built-in generate() method which properly handles KV cache
# Pass sampling parameters (temperature, topk, topp) via kwargs
output_ids = self.model.generate(
Expand Down