Skip to content

Commit 6af0778

Browse files
Enable xpu device (#1736)
This PR enables gptq example on Intel XPU --------- Signed-off-by: jiqing-feng <[email protected]> Co-authored-by: Brian Dellabetta <[email protected]>
1 parent ec07a83 commit 6af0778

40 files changed

+97
-43
lines changed

examples/awq/llama_example.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,9 @@ def tokenize(sample):
6666
print("\n\n")
6767
print("========== SAMPLE GENERATION ==============")
6868
dispatch_for_generation(model)
69-
input_ids = tokenizer("Hello my name is", return_tensors="pt").input_ids.to("cuda")
69+
input_ids = tokenizer("Hello my name is", return_tensors="pt").input_ids.to(
70+
model.device
71+
)
7072
output = model.generate(input_ids, max_new_tokens=100)
7173
print(tokenizer.decode(output[0]))
7274
print("==========================================\n\n")

examples/awq/qwen3_moe_example.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,9 @@ def tokenize(sample):
7171
print("\n\n")
7272
print("========== SAMPLE GENERATION ==============")
7373
dispatch_for_generation(model)
74-
input_ids = tokenizer("Hello my name is", return_tensors="pt").input_ids.to("cuda")
74+
input_ids = tokenizer("Hello my name is", return_tensors="pt").input_ids.to(
75+
model.device
76+
)
7577
output = model.generate(input_ids, max_new_tokens=100)
7678
print(tokenizer.decode(output[0]))
7779
print("==========================================\n\n")

examples/big_models_with_sequential_onloading/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ During `oneshot`, only one gpu is required which will be used to onload each lay
3737
```python
3838
dispatch_for_generation(model)
3939
sample = tokenizer("Hello my name is", return_tensors="pt")
40-
sample = {key: value.to("cuda") for key, value in sample.items()}
40+
sample = {key: value.to(model.device) for key, value in sample.items()}
4141
output = model.generate(**sample, max_new_tokens=100)
4242
print(tokenizer.decode(output[0]))
4343
```

examples/big_models_with_sequential_onloading/llama3.3_70b.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def tokenize(sample):
7676
print("========== SAMPLE GENERATION ==============")
7777
dispatch_for_generation(model)
7878
sample = tokenizer("Hello my name is", return_tensors="pt")
79-
sample = {key: value.to("cuda") for key, value in sample.items()}
79+
sample = {key: value.to(model.device) for key, value in sample.items()}
8080
output = model.generate(**sample, max_new_tokens=100)
8181
print(tokenizer.decode(output[0]))
8282
print("==========================================\n\n")

examples/compressed_inference/fp8_compressed_inference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
compressed_model = AutoModelForCausalLM.from_pretrained(
2323
MODEL_STUB,
2424
torch_dtype="auto",
25-
device_map="cuda:0",
25+
device_map="auto",
2626
)
2727

2828
# tokenize the sample data

examples/multimodal_vision/gemma3_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def data_collator(batch):
6868
raw_image = Image.open(requests.get(image_url, stream=True).raw)
6969

7070
# Note: compile is disabled: https://github.com/huggingface/transformers/issues/38333
71-
inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to("cuda")
71+
inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to(model.device)
7272
output = model.generate(**inputs, max_new_tokens=100, disable_compile=True)
7373
print(processor.decode(output[0], skip_special_tokens=True))
7474
print("==========================================")

examples/multimodal_vision/idefics3_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def tokenize(sample):
109109
image_url = "http://images.cocodataset.org/train2017/000000231895.jpg"
110110
raw_image = Image.open(requests.get(image_url, stream=True).raw)
111111

112-
inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to("cuda")
112+
inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to(model.device)
113113
output = model.generate(**inputs, max_new_tokens=100)
114114
print(processor.decode(output[0], skip_special_tokens=True))
115115
print("==========================================")

examples/multimodal_vision/llava_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def data_collator(batch):
6464
image_url = "http://images.cocodataset.org/train2017/000000231895.jpg"
6565
raw_image = Image.open(requests.get(image_url, stream=True).raw)
6666

67-
inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to("cuda")
67+
inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to(model.device)
6868
output = model.generate(**inputs, max_new_tokens=100)
6969
print(processor.decode(output[0], skip_special_tokens=True))
7070
print("==========================================")

examples/multimodal_vision/mistral3_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def data_collator(batch):
7777
image_url = "http://images.cocodataset.org/train2017/000000231895.jpg"
7878
raw_image = Image.open(requests.get(image_url, stream=True).raw)
7979

80-
inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to("cuda")
80+
inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to(model.device)
8181
inputs["pixel_values"] = inputs["pixel_values"].to(model.dtype) # fix dtype
8282
output = model.generate(**inputs, max_new_tokens=100)
8383
print(processor.decode(output[0], skip_special_tokens=True))

examples/multimodal_vision/mllama_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def data_collator(batch):
6464
image_url = "http://images.cocodataset.org/train2017/000000231895.jpg"
6565
raw_image = Image.open(requests.get(image_url, stream=True).raw)
6666

67-
inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to("cuda")
67+
inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to(model.device)
6868
output = model.generate(**inputs, max_new_tokens=100)
6969
print(processor.decode(output[0], skip_special_tokens=True))
7070
print("==========================================")

0 commit comments

Comments
 (0)