Skip to content

Commit 13fe160

Browse files
Update how Hugging Face's config files are processed (microsoft#1693)
### Description This PR updates how Hugging Face's config files are processed while creating the `genai_config.json` file for ONNX Runtime GenAI. ### Motivation and Context The `config.json` file typically stores model attributes while the `generation_config.json` file typically stores search attributes. Some fields can overlap between the two files, and the latter file typically contains more detailed information than the former file for a particular attribute (e.g. the [`config.json`](https://huggingface.co/google/gemma-3-270m-it/blob/main/config.json#L10) file for `google/gemma-3-270m-it` says that `eos_token_id = 1` but the [`generation_config.json`](https://huggingface.co/google/gemma-3-270m-it/blob/main/generation_config.json#L4) file for the same model says that `eos_token_id = [1, 106]`). To ensure the right attributes are stored, we compare the values stored in `generation_config.json` with the traditional default values loaded in `config.json`. If they do not match, then the values in `config.json` are rewritten with the `generation_config.json` values. With this PR, models such as [`google/gemma-3-270m-it`](https://huggingface.co/google/gemma-3-270m-it) and [`janhq/Jan-v1-4B`](https://huggingface.co/janhq/Jan-v1-4B) can be set with their default search values.
1 parent bee5dca commit 13fe160

File tree

1 file changed

+25
-5
lines changed

1 file changed

+25
-5
lines changed

src/python/py/models/builder.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -374,10 +374,26 @@ def make_attention_init(self):
374374
self.past_present_share_buffer = self.attention_attrs["op_type"] == "GroupQueryAttention"
375375

376376
def make_genai_config(self, model_name_or_path, extra_kwargs, out_dir):
377+
# Create config with attributes from config.json and generation_config.json (if latter file exists)
378+
config = AutoConfig.from_pretrained(model_name_or_path, token=self.hf_token, trust_remote_code=True, **extra_kwargs)
377379
try:
378-
config = GenerationConfig.from_pretrained(model_name_or_path, token=self.hf_token, trust_remote_code=True, **extra_kwargs)
380+
# Override search attributes in config based on values in generation_config.json
381+
gen_config = GenerationConfig.from_pretrained(model_name_or_path, token=self.hf_token, trust_remote_code=True, **extra_kwargs)
382+
defaults = {
383+
"bos_token_id": None,
384+
"do_sample": False,
385+
"eos_token_id": None,
386+
"pad_token_id": None,
387+
"temperature": 1.0,
388+
"top_k": 50,
389+
"top_p": 1.0,
390+
}
391+
for key, default_val in defaults.items():
392+
val = getattr(gen_config, key)
393+
if val != default_val:
394+
setattr(config, key, getattr(gen_config, key))
379395
except:
380-
config = AutoConfig.from_pretrained(model_name_or_path, token=self.hf_token, trust_remote_code=True, **extra_kwargs)
396+
pass
381397

382398
inputs = dict(zip(self.input_names, self.input_names))
383399
inputs.update({
@@ -393,9 +409,12 @@ def make_genai_config(self, model_name_or_path, extra_kwargs, out_dir):
393409
# Remove 'hidden_states' from 'outputs' entry in config since ORT GenAI doesn't use it
394410
del outputs["hidden_states"]
395411

412+
bos_token_id = config.bos_token_id if hasattr(config, "bos_token_id") and config.bos_token_id is not None else 1
413+
eos_token_id = config.eos_token_id
414+
pad_token_id = config.pad_token_id if hasattr(config, "pad_token_id") and config.pad_token_id is not None else config.eos_token_id[0] if isinstance(config.eos_token_id, list) else config.eos_token_id
396415
genai_config = {
397416
"model": {
398-
"bos_token_id": config.bos_token_id if hasattr(config, "bos_token_id") and config.bos_token_id is not None else 1, # config.bos_token_id not present in ChatGLM model configs.
417+
"bos_token_id": bos_token_id,
399418
"context_length": self.context_length,
400419
"decoder": {
401420
"session_options" : {
@@ -411,8 +430,8 @@ def make_genai_config(self, model_name_or_path, extra_kwargs, out_dir):
411430
"num_hidden_layers": self.num_layers,
412431
"num_key_value_heads": self.num_kv_heads,
413432
},
414-
"eos_token_id": config.eos_token_id,
415-
"pad_token_id": config.pad_token_id if hasattr(config, "pad_token_id") and config.pad_token_id is not None else config.eos_token_id[0] if isinstance(config.eos_token_id, list) else config.eos_token_id,
433+
"eos_token_id": eos_token_id,
434+
"pad_token_id": pad_token_id,
416435
"type": self.model_type[ : self.model_type.find("For") if "For" in self.model_type else len(self.model_type)].lower(),
417436
"vocab_size": self.vocab_size,
418437
},
@@ -4268,6 +4287,7 @@ def create_model(model_name, input_path, output_dir, precision, execution_provid
42684287
# List architecture options in alphabetical order
42694288
if config.architectures[0] == "ChatGLMForConditionalGeneration" or config.architectures[0] == "ChatGLMModel":
42704289
# Quantized ChatGLM model has ChatGLMForConditionalGeneration as architecture whereas HF model as the latter
4290+
config.bos_token_id = 1
42714291
config.hidden_act = "swiglu"
42724292
onnx_model = ChatGLMModel(config, io_dtype, onnx_dtype, execution_provider, cache_dir, extra_options)
42734293
elif config.architectures[0] == "Ernie4_5_ForCausalLM":

0 commit comments

Comments
 (0)