Add Intel GPU config (PaddlePaddle#4992)

Bobholamovic · TingquanGao · commit 00a7fddcf3fb · 2026-03-25T14:36:08.000Z
diff --git a/paddlex/inference/genai/configs/paddleocr_vl_09b.py b/paddlex/inference/genai/configs/paddleocr_vl_09b.py
@@ -45,13 +45,26 @@ def get_config(backend):
             cfg["max-concurrency"] = 2048
         return cfg
     elif backend == "vllm":
-        return {
-            "trust-remote-code": True,
-            "gpu-memory-utilization": 0.5,
-            "max-model-len": 16384,
-            "max-num-batched-tokens": 131072,
-            "api-server-count": 4,
-        }
+        require_deps("torch")
+
+        import torch
+
+        if torch.xpu.is_available():
+            return {
+                "trust-remote-code": True,
+                "max-num-batched-tokens": 16384,
+                "no-enable-prefix-caching": True,
+                "mm-processor-cache-gb": 0,
+                "enforce-eager": True,
+            }
+        else:
+            return {
+                "trust-remote-code": True,
+                "gpu-memory-utilization": 0.5,
+                "max-model-len": 16384,
+                "max-num-batched-tokens": 131072,
+                "api-server-count": 4,
+            }
     elif backend == "sglang":
         return {
             "trust-remote-code": True,