fix: use torch instead of numpy to resolve device mismatch bug

tjohnson31415 · njhill · commit f4060c075886 · 2023-11-14T18:29:13.000-08:00
Signed-off-by: Travis Johnson &lt;tsjohnso@us.ibm.com&gt;
diff --git a/server/text_generation_server/models/causal_lm.py b/server/text_generation_server/models/causal_lm.py
@@ -2,7 +2,6 @@
 import time
 from operator import itemgetter
 
-import numpy as np
 import torch
 
 from dataclasses import dataclass
@@ -163,15 +162,11 @@ def from_pb(
 
         # Padded all_input_ids_tensor; the maximum length of any sequence is the max
         # (padded) input sequence length + the max output length
-        all_input_ids_tensor = np.full(
+        all_input_ids_tensor = torch.full(
             (batch_size, tokenize_length + padding_right_offset),
             tokenizer.pad_token_id,
         )
         all_input_ids_tensor[:, :all_input_ids.shape[1]] = all_input_ids
-        # Create tensors on device
-        all_input_ids_tensor = all_input_ids.new_tensor(
-            all_input_ids_tensor,
-        )
 
         if prefix_ids:
             # Get input embeddings