feat: Add XPU device support for Intel GPUs (#2809)

jspast · web-flow · commit 2b83fdd0deee · 2026-01-05T17:35:26.000+01:00
* feat: Add XPU device support * feat: Add XPU as supported device on some models * docs: Add XPU usage to examples * DCO Remediation Commit for jspast <140563347+jspast@users.noreply.github.com> I, jspast <140563347+jspast@users.noreply.github.com>, hereby add my Signed-off-by to this commit: f26e8b8 I, jspast <140563347+jspast@users.noreply.github.com>, hereby add my Signed-off-by to this commit: a4a2bf9 I, jspast <140563347+jspast@users.noreply.github.com>, hereby add my Signed-off-by to this commit: a2d5dac Signed-off-by: jspast <140563347+jspast@users.noreply.github.com> --------- Signed-off-by: jspast <140563347+jspast@users.noreply.github.com>
diff --git a/docling/datamodel/accelerator_options.py b/docling/datamodel/accelerator_options.py
@@ -17,6 +17,7 @@ class AcceleratorDevice(str, Enum):
     CPU = "cpu"
     CUDA = "cuda"
     MPS = "mps"
+    XPU = "xpu"
 
 
 class AcceleratorOptions(BaseSettings):
@@ -30,13 +31,13 @@ class AcceleratorOptions(BaseSettings):
 
     @field_validator("device")
     def validate_device(cls, value):
-        # "auto", "cpu", "cuda", "mps", or "cuda:N"
+        # "auto", "cpu", "cuda", "mps", "xpu", or "cuda:N"
         if value in {d.value for d in AcceleratorDevice} or re.match(
             r"^cuda(:\d+)?$", value
         ):
             return value
         raise ValueError(
-            "Invalid device option. Use 'auto', 'cpu', 'mps', 'cuda', or 'cuda:N'."
+            "Invalid device option. Use 'auto', 'cpu', 'mps', 'xpu', 'cuda', or 'cuda:N'."
         )
 
     @model_validator(mode="before")
diff --git a/docling/datamodel/layout_model_specs.py b/docling/datamodel/layout_model_specs.py
@@ -19,6 +19,7 @@ class LayoutModelConfig(BaseModel):
         AcceleratorDevice.CPU,
         AcceleratorDevice.CUDA,
         AcceleratorDevice.MPS,
+        AcceleratorDevice.XPU,
     ]
 
     @property
diff --git a/docling/datamodel/pipeline_options_asr_model.py b/docling/datamodel/pipeline_options_asr_model.py
@@ -39,6 +39,7 @@ class InlineAsrOptions(BaseAsrOptions):
         AcceleratorDevice.CPU,
         AcceleratorDevice.CUDA,
         AcceleratorDevice.MPS,
+        AcceleratorDevice.XPU,
     ]
 
     @property
diff --git a/docling/datamodel/pipeline_options_vlm_model.py b/docling/datamodel/pipeline_options_vlm_model.py
@@ -93,6 +93,7 @@ class InlineVlmOptions(BaseVlmOptions):
         AcceleratorDevice.CPU,
         AcceleratorDevice.CUDA,
         AcceleratorDevice.MPS,
+        AcceleratorDevice.XPU,
     ]
 
     stop_strings: List[str] = []
diff --git a/docling/datamodel/vlm_model_specs.py b/docling/datamodel/vlm_model_specs.py
@@ -28,6 +28,7 @@
     supported_devices=[
         AcceleratorDevice.CPU,
         AcceleratorDevice.CUDA,
+        AcceleratorDevice.XPU,
     ],
     extra_generation_config=dict(skip_special_tokens=False),
     scale=2.0,
@@ -92,6 +93,7 @@
     supported_devices=[
         AcceleratorDevice.CPU,
         AcceleratorDevice.CUDA,
+        AcceleratorDevice.XPU,
     ],
     torch_dtype="bfloat16",
     scale=2.0,
@@ -106,6 +108,7 @@
     inference_framework=InferenceFramework.VLLM,
     supported_devices=[
         AcceleratorDevice.CUDA,
+        AcceleratorDevice.XPU,
     ],
     scale=2.0,
     temperature=0.0,
@@ -123,6 +126,7 @@
         AcceleratorDevice.CPU,
         AcceleratorDevice.CUDA,
         # AcceleratorDevice.MPS,
+        AcceleratorDevice.XPU,
     ],
     torch_dtype="bfloat16",
     scale=2.0,
@@ -150,6 +154,7 @@
     inference_framework=InferenceFramework.VLLM,
     supported_devices=[
         AcceleratorDevice.CUDA,
+        AcceleratorDevice.XPU,
     ],
     scale=2.0,
     temperature=0.0,
@@ -167,6 +172,7 @@
         AcceleratorDevice.CPU,
         AcceleratorDevice.CUDA,
         AcceleratorDevice.MPS,
+        AcceleratorDevice.XPU,
     ],
     scale=2.0,
     temperature=0.0,
@@ -179,6 +185,7 @@
     inference_framework=InferenceFramework.VLLM,
     supported_devices=[
         AcceleratorDevice.CUDA,
+        AcceleratorDevice.XPU,
     ],
     scale=2.0,
     temperature=0.0,
@@ -201,7 +208,11 @@
     response_format=ResponseFormat.MARKDOWN,
     inference_framework=InferenceFramework.TRANSFORMERS,
     transformers_model_type=TransformersModelType.AUTOMODEL_VISION2SEQ,
-    supported_devices=[AcceleratorDevice.CPU, AcceleratorDevice.CUDA],
+    supported_devices=[
+        AcceleratorDevice.CPU,
+        AcceleratorDevice.CUDA,
+        AcceleratorDevice.XPU,
+    ],
     scale=2.0,
     temperature=0.0,
 )
@@ -224,7 +235,11 @@
     response_format=ResponseFormat.MARKDOWN,
     inference_framework=InferenceFramework.TRANSFORMERS,
     transformers_model_type=TransformersModelType.AUTOMODEL_CAUSALLM,
-    supported_devices=[AcceleratorDevice.CPU, AcceleratorDevice.CUDA],
+    supported_devices=[
+        AcceleratorDevice.CPU,
+        AcceleratorDevice.CUDA,
+        AcceleratorDevice.XPU,
+    ],
     scale=2.0,
     temperature=0.0,
     extra_generation_config=dict(num_logits_to_keep=0),
@@ -253,6 +268,7 @@
         AcceleratorDevice.CPU,
         AcceleratorDevice.CUDA,
         #    AcceleratorDevice.MPS,
+        AcceleratorDevice.XPU,
     ],
     scale=2.0,
     temperature=0.0,
@@ -295,6 +311,7 @@
         AcceleratorDevice.CUDA,
         AcceleratorDevice.CPU,
         AcceleratorDevice.MPS,
+        AcceleratorDevice.XPU,
     ],
     scale=2.0,
     temperature=0.0,
@@ -313,6 +330,7 @@
         AcceleratorDevice.CPU,
         AcceleratorDevice.CUDA,
         AcceleratorDevice.MPS,
+        AcceleratorDevice.XPU,
     ],
     scale=2.0,
     temperature=0.0,
diff --git a/docling/models/code_formula_model.py b/docling/models/code_formula_model.py
@@ -96,7 +96,11 @@ def __init__(
         if self.enabled:
             self.device = decide_device(
                 accelerator_options.device,
-                supported_devices=[AcceleratorDevice.CPU, AcceleratorDevice.CUDA],
+                supported_devices=[
+                    AcceleratorDevice.CPU,
+                    AcceleratorDevice.CUDA,
+                    AcceleratorDevice.XPU,
+                ],
             )
 
             if artifacts_path is None:
diff --git a/docling/utils/accelerator_utils.py b/docling/utils/accelerator_utils.py
@@ -22,6 +22,7 @@ def decide_device(
 
     has_cuda = torch.backends.cuda.is_built() and torch.cuda.is_available()
     has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
+    has_xpu = torch.xpu.is_available()
 
     if supported_devices is not None:
         if has_cuda and AcceleratorDevice.CUDA not in supported_devices:
@@ -34,12 +35,19 @@ def decide_device(
                 f"Removing MPS from available devices because it is not in {supported_devices=}"
             )
             has_mps = False
+        if has_xpu and AcceleratorDevice.XPU not in supported_devices:
+            _log.info(
+                f"Removing XPU from available devices because it is not in {supported_devices=}"
+            )
+            has_xpu = False
 
     if accelerator_device == AcceleratorDevice.AUTO.value:  # Handle 'auto'
         if has_cuda:
             device = "cuda:0"
         elif has_mps:
             device = "mps"
+        elif has_xpu:
+            device = "xpu"
 
     elif accelerator_device.startswith("cuda"):
         if has_cuda:
@@ -71,6 +79,12 @@ def decide_device(
         else:
             _log.warning("MPS is not available in the system. Fall back to 'CPU'")
 
+    elif accelerator_device == AcceleratorDevice.XPU.value:
+        if has_xpu:
+            device = "xpu"
+        else:
+            _log.warning("XPU is not available in the system. Fall back to 'CPU'")
+
     elif accelerator_device == AcceleratorDevice.CPU.value:
         device = "cpu"
 
diff --git a/docs/examples/compare_vlm_models.py b/docs/examples/compare_vlm_models.py
@@ -141,7 +141,11 @@ def convert(sources: list[Path], converter: DocumentConverter):
         response_format=ResponseFormat.MARKDOWN,
         inference_framework=InferenceFramework.TRANSFORMERS,
         transformers_model_type=TransformersModelType.AUTOMODEL_IMAGETEXTTOTEXT,
-        supported_devices=[AcceleratorDevice.CUDA, AcceleratorDevice.CPU],
+        supported_devices=[
+            AcceleratorDevice.CUDA,
+            AcceleratorDevice.CPU,
+            AcceleratorDevice.XPU,
+        ],
         scale=2.0,
         temperature=0.0,
     )
@@ -154,7 +158,11 @@ def convert(sources: list[Path], converter: DocumentConverter):
         inference_framework=InferenceFramework.TRANSFORMERS,
         transformers_model_type=TransformersModelType.AUTOMODEL_IMAGETEXTTOTEXT,
         transformers_prompt_style=TransformersPromptStyle.RAW,
-        supported_devices=[AcceleratorDevice.CUDA, AcceleratorDevice.CPU],
+        supported_devices=[
+            AcceleratorDevice.CUDA,
+            AcceleratorDevice.CPU,
+            AcceleratorDevice.XPU,
+        ],
         scale=2.0,
         temperature=0.0,
     )
diff --git a/docs/examples/run_with_accelerator.py b/docs/examples/run_with_accelerator.py
@@ -1,19 +1,19 @@
 # %% [markdown]
-# Run conversion with an explicit accelerator configuration (CPU/MPS/CUDA).
+# Run conversion with an explicit accelerator configuration (CPU/MPS/CUDA/XPU).
 #
 # What this example does
 # - Shows how to select the accelerator device and thread count.
 # - Enables OCR and table structure to exercise compute paths, and prints timings.
 #
 # How to run
 # - From the repo root: `python docs/examples/run_with_accelerator.py`.
-# - Toggle the commented `AcceleratorOptions` examples to try AUTO/MPS/CUDA.
+# - Toggle the commented `AcceleratorOptions` examples to try AUTO/MPS/CUDA/XPU.
 #
 # Notes
 # - EasyOCR does not support `cuda:N` device selection (defaults to `cuda:0`).
 # - `settings.debug.profile_pipeline_timings = True` prints profiling details.
-# - `AcceleratorDevice.MPS` is macOS-only; `CUDA` requires a compatible GPU and
-#   CUDA-enabled PyTorch build. CPU mode works everywhere.
+# - `AcceleratorDevice.MPS` is macOS-only; `CUDA` and `XPU` require a compatible GPU and
+#   CUDA/XPU-enabled PyTorch build. CPU mode works everywhere.
 
 # %%
 
@@ -43,6 +43,9 @@ def main():
     #     num_threads=8, device=AcceleratorDevice.MPS
     # )
     # accelerator_options = AcceleratorOptions(
+    #     num_threads=8, device=AcceleratorDevice.XPU
+    # )
+    # accelerator_options = AcceleratorOptions(
     #     num_threads=8, device=AcceleratorDevice.CUDA
     # )
 
diff --git a/docs/usage/vision_models.md b/docs/usage/vision_models.md
@@ -105,6 +105,7 @@ pipeline_options = VlmPipelineOptions(
             AcceleratorDevice.CPU,
             AcceleratorDevice.CUDA,
             AcceleratorDevice.MPS,
+            AcceleratorDevice.XPU,
         ],
         scale=2.0,
         temperature=0.0,
diff --git a/tests/test_options.py b/tests/test_options.py
@@ -50,10 +50,13 @@ def test_accelerator_options():
     # Use API
     ao2 = AcceleratorOptions(num_threads=2, device=AcceleratorDevice.MPS)
     ao3 = AcceleratorOptions(num_threads=3, device=AcceleratorDevice.CUDA)
+    ao4 = AcceleratorOptions(num_threads=4, device=AcceleratorDevice.XPU)
     assert ao2.num_threads == 2
     assert ao2.device == AcceleratorDevice.MPS
     assert ao3.num_threads == 3
     assert ao3.device == AcceleratorDevice.CUDA
+    assert ao4.num_threads == 4
+    assert ao4.device == AcceleratorDevice.XPU
 
     # Use envvars (regular + alternative) and default values
     os.environ["OMP_NUM_THREADS"] = "1"
@@ -67,15 +70,15 @@ def test_accelerator_options():
 
     # Use envvars and override in init
     os.environ["DOCLING_DEVICE"] = "cpu"
-    ao4 = AcceleratorOptions(num_threads=5, device=AcceleratorDevice.MPS)
-    assert ao4.num_threads == 5
-    assert ao4.device == AcceleratorDevice.MPS
+    ao5 = AcceleratorOptions(num_threads=5, device=AcceleratorDevice.MPS)
+    assert ao5.num_threads == 5
+    assert ao5.device == AcceleratorDevice.MPS
 
     # Use regular and alternative envvar
     os.environ["DOCLING_NUM_THREADS"] = "2"
-    ao5 = AcceleratorOptions()
-    assert ao5.num_threads == 2
-    assert ao5.device == AcceleratorDevice.CPU
+    ao6 = AcceleratorOptions()
+    assert ao6.num_threads == 2
+    assert ao6.device == AcceleratorDevice.CPU
 
     # Use wrong values
     is_exception = False
@@ -91,9 +94,9 @@ def test_accelerator_options():
     del os.environ["DOCLING_NUM_THREADS"]
     del os.environ["DOCLING_DEVICE"]
     os.environ["OMP_NUM_THREADS"] = "wrong"
-    ao6 = AcceleratorOptions()
-    assert ao6.num_threads == 4
-    assert ao6.device == AcceleratorDevice.AUTO
+    ao7 = AcceleratorOptions()
+    assert ao7.num_threads == 4
+    assert ao7.device == AcceleratorDevice.AUTO
 
 
 def test_e2e_conversions(test_doc_path):

Original file line number	Diff line number	Diff line change
`@@ -19,6 +19,7 @@ class LayoutModelConfig(BaseModel):`
`19`	`19`	`AcceleratorDevice.CPU,`
`20`	`20`	`AcceleratorDevice.CUDA,`
`21`	`21`	`AcceleratorDevice.MPS,`
	`22`	`+ AcceleratorDevice.XPU,`
`22`	`23`	`]`
`23`	`24`
`24`	`25`	`@property`
Original file line number	Diff line number	Diff line change
`@@ -39,6 +39,7 @@ class InlineAsrOptions(BaseAsrOptions):`
`39`	`39`	`AcceleratorDevice.CPU,`
`40`	`40`	`AcceleratorDevice.CUDA,`
`41`	`41`	`AcceleratorDevice.MPS,`
	`42`	`+ AcceleratorDevice.XPU,`
`42`	`43`	`]`
`43`	`44`
`44`	`45`	`@property`
Original file line number	Diff line number	Diff line change
`@@ -93,6 +93,7 @@ class InlineVlmOptions(BaseVlmOptions):`
`93`	`93`	`AcceleratorDevice.CPU,`
`94`	`94`	`AcceleratorDevice.CUDA,`
`95`	`95`	`AcceleratorDevice.MPS,`
	`96`	`+ AcceleratorDevice.XPU,`
`96`	`97`	`]`
`97`	`98`
`98`	`99`	`stop_strings: List[str] = []`