|
465 | 465 | "option.enable_reasoning": True, |
466 | 466 | "option.reasoning_parser": "deepseek_r1", |
467 | 467 | }, |
| 468 | + "qwen3-8b": { |
| 469 | + "option.model_id": "Qwen/Qwen3-8B", |
| 470 | + "option.tensor_parallel_degree": 1, |
| 471 | + }, |
| 472 | + "qwen3-8b-lmcache": { |
| 473 | + "option.model_id": "Qwen/Qwen3-8B", |
| 474 | + "option.tensor_parallel_degree": 1, |
| 475 | + "option.load_format": "dummy", |
| 476 | + "option.max_new_tokens": 100, |
| 477 | + "lmcache_config_file": "lmcache_qwen3_benchmark.yaml", |
| 478 | + "option.kv_transfer_config": |
| 479 | + '{"kv_connector":"LMCacheConnectorV1", "kv_role":"kv_both"}', |
| 480 | + "load_on_devices": 0, |
| 481 | + }, |
| 482 | + "qwen3-8b-baseline": { |
| 483 | + "option.model_id": "Qwen/Qwen3-8B", |
| 484 | + "option.tensor_parallel_degree": 1, |
| 485 | + "option.load_format": "dummy", |
| 486 | + "option.max_new_tokens": 100, |
| 487 | + "gpu.maxWorkers": 1, |
| 488 | + "load_on_devices": 0, |
| 489 | + }, |
| 490 | + "qwen3-8b-lmcache-ebs": { |
| 491 | + "option.model_id": "Qwen/Qwen3-8B", |
| 492 | + "option.tensor_parallel_degree": 1, |
| 493 | + "option.load_format": "dummy", |
| 494 | + "option.max_new_tokens": 100, |
| 495 | + "lmcache_config_file": "lmcache_qwen3_ebs.yaml", |
| 496 | + "option.kv_transfer_config": |
| 497 | + '{"kv_connector":"LMCacheConnectorV1", "kv_role":"kv_both"}', |
| 498 | + "load_on_devices": 0, |
| 499 | + }, |
| 500 | + "qwen3-8b-lmcache-nvme": { |
| 501 | + "option.model_id": "Qwen/Qwen3-8B", |
| 502 | + "option.tensor_parallel_degree": 1, |
| 503 | + "option.load_format": "dummy", |
| 504 | + "option.max_new_tokens": 100, |
| 505 | + "lmcache_config_file": "lmcache_qwen3_nvme.yaml", |
| 506 | + "option.kv_transfer_config": |
| 507 | + '{"kv_connector":"LMCacheConnectorV1", "kv_role":"kv_both"}', |
| 508 | + "load_on_devices": 0, |
| 509 | + }, |
| 510 | + "qwen3-8b-no-cache": { |
| 511 | + "option.model_id": "Qwen/Qwen3-8B", |
| 512 | + "option.tensor_parallel_degree": 1, |
| 513 | + "option.load_format": "dummy", |
| 514 | + "option.max_new_tokens": 100, |
| 515 | + "option.enable_prefix_caching": False, |
| 516 | + "load_on_devices": 0, |
| 517 | + }, |
| 518 | + "qwen3-8b-vllm-prefix-cache": { |
| 519 | + "option.model_id": "Qwen/Qwen3-8B", |
| 520 | + "option.tensor_parallel_degree": 1, |
| 521 | + "option.load_format": "dummy", |
| 522 | + "option.max_new_tokens": 100, |
| 523 | + "option.enable_prefix_caching": True, |
| 524 | + "load_on_devices": 0, |
| 525 | + }, |
| 526 | + "qwen2.5-1.5b": { |
| 527 | + "option.model_id": "Qwen/Qwen2.5-1.5B", |
| 528 | + "option.tensor_parallel_degree": 1, |
| 529 | + "option.load_format": "dummy", |
| 530 | + "option.max_new_tokens": 100, |
| 531 | + }, |
| 532 | + "qwen2.5-7b": { |
| 533 | + "option.model_id": "Qwen/Qwen2.5-7B", |
| 534 | + "option.tensor_parallel_degree": 1, |
| 535 | + "option.load_format": "dummy", |
| 536 | + "option.max_new_tokens": 100, |
| 537 | + }, |
| 538 | + "qwen2.5-72b": { |
| 539 | + "option.model_id": "Qwen/Qwen2.5-72B", |
| 540 | + "option.tensor_parallel_degree": 4, |
| 541 | + "option.load_format": "dummy", |
| 542 | + "option.max_new_tokens": 100, |
| 543 | + }, |
| 544 | + "qwen2.5-1.5b-lmcache": { |
| 545 | + "option.model_id": |
| 546 | + "Qwen/Qwen2.5-1.5B", |
| 547 | + "option.tensor_parallel_degree": |
| 548 | + 1, |
| 549 | + "option.load_format": |
| 550 | + "dummy", |
| 551 | + "option.max_new_tokens": |
| 552 | + 100, |
| 553 | + "lmcache_config_file": |
| 554 | + "lmcache_qwen25_1_5b.yaml", |
| 555 | + "option.kv_transfer_config": |
| 556 | + '{"kv_connector":"LMCacheConnectorV1", "kv_role":"kv_both"}', |
| 557 | + }, |
| 558 | + "qwen2.5-7b-lmcache": { |
| 559 | + "option.model_id": |
| 560 | + "Qwen/Qwen2.5-7B", |
| 561 | + "option.tensor_parallel_degree": |
| 562 | + 1, |
| 563 | + "option.load_format": |
| 564 | + "dummy", |
| 565 | + "option.max_new_tokens": |
| 566 | + 100, |
| 567 | + "lmcache_config_file": |
| 568 | + "lmcache_qwen25_7b.yaml", |
| 569 | + "option.kv_transfer_config": |
| 570 | + '{"kv_connector":"LMCacheConnectorV1", "kv_role":"kv_both"}', |
| 571 | + }, |
| 572 | + "qwen2.5-72b-lmcache": { |
| 573 | + "option.model_id": |
| 574 | + "Qwen/Qwen2.5-72B", |
| 575 | + "option.tensor_parallel_degree": |
| 576 | + 4, |
| 577 | + "option.load_format": |
| 578 | + "dummy", |
| 579 | + "option.max_new_tokens": |
| 580 | + 100, |
| 581 | + "lmcache_config_file": |
| 582 | + "lmcache_qwen25_72b.yaml", |
| 583 | + "option.kv_transfer_config": |
| 584 | + '{"kv_connector":"LMCacheConnectorV1", "kv_role":"kv_both"}', |
| 585 | + }, |
468 | 586 | "tinyllama-input-len-exceeded": { |
469 | 587 | "option.model_id": "s3://djl-llm/tinyllama-1.1b-chat/", |
470 | 588 | "option.max_model_len": "50", |
|
0 commit comments