fix lm_head not initialize issue (#4674)

YizhouZ · web-flow · commit ecffd3d17b37 · 2024-08-17T09:21:42.000+08:00
diff --git a/examples/gpu/llm/inference/run_generation_with_deepspeed.py b/examples/gpu/llm/inference/run_generation_with_deepspeed.py
@@ -201,7 +201,8 @@ def print_mem_usage(msg):
 tokenizer = model_class[1].from_pretrained(model_name)
 config = AutoConfig.from_pretrained(model_name, torchscript=args.jit)
 # Avoid deepspeed tp>=2 lm_head weight reload. Not affect the results.
-config.tie_word_embeddings = False
+if not args.disable_optimize_transformers:
+    config.tie_word_embeddings = False
 #if not hasattr(config, "text_max_length") and args.prompt is None:
 #    config.text_max_length = int(args.input_tokens) + int(args.max_new_tokens)
 print_rank0("*** model config:", config)