diff --git a/examples/arrow_multitask/arrow_phi3_mini.py b/examples/arrow_multitask/arrow_phi3_mini.py
index 8a9e01eb5f..13f7fcad58 100644
--- a/examples/arrow_multitask/arrow_phi3_mini.py
+++ b/examples/arrow_multitask/arrow_phi3_mini.py
@@ -186,7 +186,10 @@ def set_seed(seed: int):
     random.seed(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+    elif hasattr(torch, "xpu") and torch.xpu.is_available():
+        torch.xpu.manual_seed_all(seed)
 
 
 def compute_loglike_loss(logits, labels, reduction="none"):
@@ -215,11 +218,16 @@ def compute_loglike_loss(logits, labels, reduction="none"):
 
 
 def evaluate_on_multi_choice_batched(
-    eval_dataset, model, tokenizer, ds_name, labels, predictions, args, batch_size=32, max_length=512, device="cuda"
+    eval_dataset, model, tokenizer, ds_name, labels, predictions, args, batch_size=32, max_length=512, device="auto"
 ):
     # Local import to mirror your original function
     model.eval()
 
+    if device == "auto":
+        device = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda"
+    else:
+        device = torch.device(device)
+
     for start in tqdm(
         range(0, len(eval_dataset), batch_size), total=(len(eval_dataset) + batch_size - 1) // batch_size
     ):
@@ -326,7 +334,7 @@ def evaluate_on_multi_choice_batched(
                 args,
                 batch_size=64,  # tune this
                 max_length=512,  # tune if options are long
-                device="cuda",
+                device="auto",
             )
     else:
         general_adapter_paths = []
@@ -371,5 +379,5 @@ def evaluate_on_multi_choice_batched(
                 args,
                 batch_size=32,  # tune this
                 max_length=512,  # tune if options are long
-                device="cuda",
+                device="auto",
             )