We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b17216c commit ecffd3dCopy full SHA for ecffd3d
examples/gpu/llm/inference/run_generation_with_deepspeed.py
@@ -201,7 +201,8 @@ def print_mem_usage(msg):
201
tokenizer = model_class[1].from_pretrained(model_name)
202
config = AutoConfig.from_pretrained(model_name, torchscript=args.jit)
203
# Avoid deepspeed tp>=2 lm_head weight reload. Not affect the results.
204
-config.tie_word_embeddings = False
+if not args.disable_optimize_transformers:
205
+ config.tie_word_embeddings = False
206
#if not hasattr(config, "text_max_length") and args.prompt is None:
207
# config.text_max_length = int(args.input_tokens) + int(args.max_new_tokens)
208
print_rank0("*** model config:", config)
0 commit comments