|
6 | 6 |
|
7 | 7 | # isort: off |
8 | 8 | from .lora_test_utils import check_llama_7b_multi_unique_lora_adapters_from_request |
9 | | -from .test_llm import ( |
10 | | - get_model_path, global_kvcache_config, llama_model_path, |
11 | | - llm_get_stats_async_test_harness, llm_get_stats_test_harness, prompts, |
12 | | - run_llm_abort_request, run_llm_with_postprocess_parallel_and_result_handler, |
13 | | - tinyllama_logits_processor_test_harness, _test_llm_capture_request_error) |
| 9 | +from .test_llm import (get_model_path, global_kvcache_config, llama_model_path, |
| 10 | + llm_get_stats_async_test_harness, |
| 11 | + llm_get_stats_test_harness, prompts, |
| 12 | + run_llm_abort_request, |
| 13 | + run_llm_with_postprocess_parallel_and_result_handler, |
| 14 | + tinyllama_logits_processor_test_harness) |
14 | 15 | from utils.util import (EnvVarsContextManager, force_ampere, |
15 | 16 | run_function_in_sub_process, similar, |
16 | 17 | skip_gpu_memory_less_than_40gb, |
@@ -69,10 +70,6 @@ def test_llm_get_stats_async(return_context_logits, use_overlap, |
69 | 70 | enable_iter_req_stats=enable_iter_req_stats) |
70 | 71 |
|
71 | 72 |
|
72 | | -def test_llm_capture_request_error(): |
73 | | - _test_llm_capture_request_error(pytorch_backend=True, tp_size=1) |
74 | | - |
75 | | - |
76 | 73 | @force_ampere |
77 | 74 | @pytest.mark.parametrize( |
78 | 75 | "sampling_params", |
|
0 commit comments