@@ -122,9 +122,14 @@ def get_speculative_config():
122122 # Option 2: Build config from individual environment variables
123123 spec_method = os .getenv ('SPECULATIVE_METHOD' )
124124 spec_model = os .getenv ('SPECULATIVE_MODEL' )
125- num_spec_tokens = os .getenv ('NUM_SPECULATIVE_TOKENS' )
126- ngram_max = os .getenv ('NGRAM_PROMPT_LOOKUP_MAX' )
127- ngram_min = os .getenv ('NGRAM_PROMPT_LOOKUP_MIN' )
125+ _num_spec_tokens = os .getenv ('NUM_SPECULATIVE_TOKENS' )
126+ _ngram_max = os .getenv ('NGRAM_PROMPT_LOOKUP_MAX' )
127+ _ngram_min = os .getenv ('NGRAM_PROMPT_LOOKUP_MIN' )
128+
129+ # Convert numeric vars to int so '0' (hub.json default) is treated as unset
130+ num_spec_tokens = (int (_num_spec_tokens ) or None ) if _num_spec_tokens else None
131+ ngram_max = (int (_ngram_max ) or None ) if _ngram_max else None
132+ ngram_min = (int (_ngram_min ) or None ) if _ngram_min else None
128133
129134 if not any ([spec_method , spec_model , ngram_max ]):
130135 return None
@@ -150,11 +155,11 @@ def get_speculative_config():
150155 if spec_model :
151156 config ['model' ] = spec_model
152157 if num_spec_tokens :
153- config ['num_speculative_tokens' ] = int ( num_spec_tokens )
158+ config ['num_speculative_tokens' ] = num_spec_tokens
154159 if ngram_max :
155- config ['prompt_lookup_max' ] = int ( ngram_max )
160+ config ['prompt_lookup_max' ] = ngram_max
156161 if ngram_min :
157- config ['prompt_lookup_min' ] = int ( ngram_min )
162+ config ['prompt_lookup_min' ] = ngram_min
158163
159164 draft_tp = os .getenv ('SPECULATIVE_DRAFT_TENSOR_PARALLEL_SIZE' )
160165 if draft_tp :
0 commit comments