@@ -54,21 +54,23 @@ def __init__(
54
54
time .time ()
55
55
56
56
self .status = RequestStatus .WAITING
57
- if sampling_params and sampling_params .guided_decoding is not None :
58
- self .status = RequestStatus .WAITING_FOR_FSM
57
+ self .use_structured_output = False
59
58
self .events : list [EngineCoreEvent ] = []
60
59
self .stop_reason : Union [int , str , None ] = None
61
60
62
61
# P/D: Connector-specific KV transfer parameters.
63
62
self .kv_transfer_params : Optional [dict [str , Any ]] = None
64
63
65
64
if pooling_params is not None :
65
+ # Pooling models.
66
66
self .max_tokens = 1
67
67
elif sampling_params is not None :
68
+ # Generative models.
68
69
assert sampling_params .max_tokens is not None
69
70
self .max_tokens = sampling_params .max_tokens
70
71
if sampling_params .guided_decoding is not None :
71
72
self .status = RequestStatus .WAITING_FOR_FSM
73
+ self .use_structured_output = True
72
74
73
75
if sampling_params .extra_args is not None :
74
76
self .kv_transfer_params = \
@@ -192,11 +194,6 @@ def get_num_encoder_tokens(self, input_id: int) -> int:
192
194
num_tokens = self .mm_positions [input_id ].length
193
195
return num_tokens
194
196
195
- @property
196
- def use_structured_output (self ) -> bool :
197
- return self .sampling_params is not None and \
198
- self .sampling_params .guided_decoding is not None
199
-
200
197
def record_event (
201
198
self ,
202
199
event_type : EngineCoreEventType ,
0 commit comments