We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 6185106 commit 9f34c1bCopy full SHA for 9f34c1b
INFERENCE.md
@@ -62,7 +62,7 @@ model.generation_config.cache_implementation = "static"
62
model.forward = torch.compile(model.forward, mode=compile_mode)
63
64
# warmup
65
-inputs = tokenizer("This is for compilation", return_tensors="pt", padding="max_length", max_length=max_length).to(device)
+inputs = tokenizer("This is for compilation", return_tensors="pt", padding="max_length", max_length=max_length).to(torch_device)
66
67
model_kwargs = {**inputs, "prompt_input_ids": inputs.input_ids, "prompt_attention_mask": inputs.attention_mask, }
68
0 commit comments