|
63 | 63 | from executorch.examples.models.llama.source_transformation.quantize import ( |
64 | 64 | get_quant_embedding_transform, |
65 | 65 | ) |
66 | | -from executorch.examples.qualcomm.oss_scripts.llama.hf_converter.convert_config import ( |
67 | | - convert_configs, |
68 | | -) |
69 | 66 | from executorch.examples.qualcomm.oss_scripts.llama.model.static_llama import ( |
70 | 67 | LlamaModel, |
71 | 68 | ModelArgs, |
@@ -514,20 +511,25 @@ def compile(args, pte_filename, tokenizer): |
514 | 511 | start_ts = time.time() |
515 | 512 |
|
516 | 513 | kv_config, prefill_config = None, None |
| 514 | + params_path = "" |
517 | 515 | if args.params: |
518 | | - with open(args.params) as f: |
519 | | - kv_config = ModelArgs(**json.load(f)) |
| 516 | + params_path = args.params |
520 | 517 | else: |
521 | | - # For huggingface decoder model, we need to convert config to match the keys |
522 | | - model_id = HUGGING_FACE_REPO_IDS[args.decoder_model] |
523 | | - kv_config = AutoConfig.from_pretrained(model_id) |
524 | | - kv_config = convert_configs(kv_config) |
525 | | - |
526 | | - if args.decoder_model == "qwen2_5": |
527 | | - kv_config.attention_qkv_bias = True |
| 518 | + if args.decoder_model == "qwen2_5": |
| 519 | + cur_dir = os.path.dirname(__file__) |
| 520 | + params_path = os.path.join( |
| 521 | + cur_dir, |
| 522 | + "..", |
| 523 | + "..", |
| 524 | + "..", |
| 525 | + "models", |
| 526 | + "qwen2_5", |
| 527 | + "config", |
| 528 | + "0_5b_config.json", |
| 529 | + ) |
| 530 | + with open(params_path) as f: |
| 531 | + kv_config = ModelArgs(**json.load(f)) |
528 | 532 |
|
529 | | - if not hasattr(kv_config, "head_dim"): |
530 | | - kv_config.head_dim = kv_config.dim // kv_config.n_heads |
531 | 533 | # TODO: support batch inputs if necessary |
532 | 534 | kv_config.max_batch_size = 1 |
533 | 535 | kv_config.max_seq_len = args.max_seq_len |
@@ -1235,18 +1237,13 @@ def export_llama(args) -> None: |
1235 | 1237 | with open(runtime_tokenizer_path, "r+") as file: |
1236 | 1238 | data = json.load(file) |
1237 | 1239 | # TODO: Encountered the following error during runtime, so switched behavior for now. |
1238 | | - # Error: libc++abi: terminating due to uncaught exception of type std::runtime_error: |
1239 | | - # Unsupported behavior 'Isolated' for Split PreTokenizer. Only 'MergedWithPrevious' is supported. |
1240 | | - behavior = data["pre_tokenizer"]["pretokenizers"][0]["behavior"] |
1241 | | - if behavior == "Isolated": |
1242 | | - data["pre_tokenizer"]["pretokenizers"][0][ |
1243 | | - "behavior" |
1244 | | - ] = "MergedWithPrevious" |
1245 | | - file.seek(0) |
1246 | | - json.dump(data, file, indent=4) |
1247 | | - file.truncate() |
| 1240 | + # Error: libc++abi: terminating due to uncaught exception of type std::runtime_error: Unsupported Normalizer type: NFC. |
| 1241 | + data.pop("normalizer") |
| 1242 | + file.seek(0) |
| 1243 | + json.dump(data, file, indent=4) |
| 1244 | + file.truncate() |
1248 | 1245 | else: |
1249 | | - raise RuntimeError(f"Unknown decoder_model: {args.llama_model}.") |
| 1246 | + raise RuntimeError(f"Unknown decoder_model: {args.decoder_model}.") |
1250 | 1247 |
|
1251 | 1248 | if args.kv_updater == "smart_mask": |
1252 | 1249 | args.shared_buffer = True |
|
0 commit comments