@@ -439,39 +439,3 @@ def test_connector_multi_request(enforce_single_worker, model_with_connector):
439439
440440 # The KV cache of both prior requests should be freed, allowing the third request to run.
441441 model .generate ([2 ] * 110 , sampling_params = sampling_params )
442-
443-
444- @pytest .mark .threadleak (enabled = False )
445- @pytest .mark .parametrize ("matched_tokens" , [0 , 32 ])
446- def test_connector_num_matched_tokens (enforce_single_worker , model_with_connector ,
447- matched_tokens ):
448- """Test setNumConnectorMatchedTokens (set) and num_connector_matched_tokens (get)."""
449- NUM_INPUT_TOKENS = 64
450- NUM_OUTPUT_TOKENS = 8
451-
452- model_fn , scheduler , worker = model_with_connector
453- model = model_fn (disable_overlap_scheduler = True )
454-
455- scheduler .get_num_new_matched_tokens .return_value = matched_tokens , False
456- worker .get_finished .return_value = [], []
457-
458- model .generate ([1 ] * NUM_INPUT_TOKENS , SamplingParams (max_tokens = NUM_OUTPUT_TOKENS , ignore_eos = True ))
459-
460- # Get request before setNumConnectorMatchedTokens is called
461- request_before = scheduler .get_num_new_matched_tokens .call_args .args [0 ]
462- initial_value = request_before .num_connector_matched_tokens
463-
464- # Get request after setNumConnectorMatchedTokens is called
465- request_after = scheduler .update_state_after_alloc .call_args .args [0 ]
466- final_value = request_after .num_connector_matched_tokens
467-
468- # Test get: verify property exists and can be accessed
469- assert hasattr (request_after , 'num_connector_matched_tokens' )
470-
471- # Test set: verify value is correctly set
472- assert final_value == matched_tokens , \
473- f"Expected { matched_tokens } , got { final_value } "
474-
475- # Verify set worked (value changed when matched_tokens != 0)
476- if matched_tokens != 0 :
477- assert final_value != initial_value
0 commit comments