Fix typo in INFERENCE.md, change return_tensors and to correct usage of device (#98)

unclecode · web-flow · commit 9f34c1b8730e · 2024-08-13T11:21:06.000+02:00
diff --git a/INFERENCE.md b/INFERENCE.md
@@ -62,7 +62,7 @@ model.generation_config.cache_implementation = "static"
 model.forward = torch.compile(model.forward, mode=compile_mode)
 
 # warmup
-inputs = tokenizer("This is for compilation", return_tensors="pt", padding="max_length", max_length=max_length).to(device)
+inputs = tokenizer("This is for compilation", return_tensors="pt", padding="max_length", max_length=max_length).to(torch_device)
 
 model_kwargs = {**inputs, "prompt_input_ids": inputs.input_ids, "prompt_attention_mask": inputs.attention_mask, }