Skip to content

Commit 929f678

Browse files
committed
update examples, remove offload devicemap utils
Signed-off-by: Kyle Sayers <[email protected]>
1 parent 6f86244 commit 929f678

39 files changed

+55
-579
lines changed

examples/awq/llama_example.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,10 @@
55
from llmcompressor.modifiers.awq import AWQModifier
66

77
# Select model and load it.
8-
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
8+
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
99

10-
model = AutoModelForCausalLM.from_pretrained(
11-
MODEL_ID, device_map="auto", torch_dtype="auto"
12-
)
13-
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
10+
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
11+
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
1412

1513
# Select calibration dataset.
1614
DATASET_ID = "mit-han-lab/pile-val-backup"
@@ -72,6 +70,6 @@ def tokenize(sample):
7270
print("==========================================\n\n")
7371

7472
# Save to disk compressed.
75-
SAVE_DIR = MODEL_ID.split("/")[-1] + "-awq-asym"
73+
SAVE_DIR = model_id.split("/")[-1] + "-awq-asym"
7674
model.save_pretrained(SAVE_DIR, save_compressed=True)
7775
tokenizer.save_pretrained(SAVE_DIR)

examples/awq/qwen3_moe_example.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,9 @@
55
from llmcompressor.modifiers.awq import AWQModifier
66

77
# Select model and load it.
8-
MODEL_ID = "Qwen/Qwen3-30B-A3B"
9-
10-
model = AutoModelForCausalLM.from_pretrained(
11-
MODEL_ID, device_map="auto", torch_dtype="auto"
12-
)
13-
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
8+
model_id = "Qwen/Qwen3-30B-A3B"
9+
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
10+
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
1411

1512
# Select calibration dataset.
1613
DATASET_ID = "mit-han-lab/pile-val-backup"
@@ -77,6 +74,6 @@ def tokenize(sample):
7774
print("==========================================\n\n")
7875

7976
# Save to disk compressed.
80-
SAVE_DIR = MODEL_ID.split("/")[-1] + "-awq-sym"
77+
SAVE_DIR = model_id.split("/")[-1] + "-awq-sym"
8178
model.save_pretrained(SAVE_DIR, save_compressed=True)
8279
tokenizer.save_pretrained(SAVE_DIR)

examples/big_models_with_accelerate/README.md

Lines changed: 0 additions & 95 deletions
This file was deleted.

examples/big_models_with_accelerate/cpu_offloading_fp8.py

Lines changed: 0 additions & 26 deletions
This file was deleted.

examples/big_models_with_accelerate/mult_gpus_int8_device_map.py

Lines changed: 0 additions & 81 deletions
This file was deleted.

examples/big_models_with_accelerate/multi_gpu_int8.py

Lines changed: 0 additions & 78 deletions
This file was deleted.

examples/compressed_inference/fp8_compressed_inference.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,7 @@
1919
"def fibonacci(n):",
2020
]
2121

22-
compressed_model = AutoModelForCausalLM.from_pretrained(
23-
MODEL_STUB,
24-
torch_dtype="auto",
25-
device_map="cuda:0",
26-
)
22+
compressed_model = AutoModelForCausalLM.from_pretrained(MODEL_STUB, torch_dtype="auto")
2723

2824
# tokenize the sample data
2925
tokenizer = AutoTokenizer.from_pretrained(MODEL_STUB)

examples/multimodal_audio/whisper_example.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,10 @@
66
from llmcompressor.modifiers.quantization import GPTQModifier
77

88
# Select model and load it.
9-
MODEL_ID = "openai/whisper-large-v3"
10-
11-
model = WhisperForConditionalGeneration.from_pretrained(
12-
MODEL_ID,
13-
device_map="auto",
14-
torch_dtype="auto",
15-
)
9+
model_id = "openai/whisper-large-v3"
10+
model = WhisperForConditionalGeneration.from_pretrained(model_id, torch_dtype="auto")
1611
model.config.forced_decoder_ids = None
17-
processor = WhisperProcessor.from_pretrained(MODEL_ID)
12+
processor = WhisperProcessor.from_pretrained(model_id)
1813

1914
# Configure processor the dataset task.
2015
processor.tokenizer.set_prefix_tokens(language="en", task="transcribe")
@@ -106,6 +101,6 @@ def data_collator(batch):
106101
# and it was a great thing for what it was at the time but it's not a passive house
107102

108103
# Save to disk compressed.
109-
SAVE_DIR = MODEL_ID.split("/")[1] + "-W4A16-G128"
104+
SAVE_DIR = model_id.split("/")[1] + "-W4A16-G128"
110105
model.save_pretrained(SAVE_DIR, save_compressed=True)
111106
processor.save_pretrained(SAVE_DIR)

examples/multimodal_vision/gemma3_example.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@
88

99
# Load model.
1010
model_id = "google/gemma-3-4b-it"
11-
model = Gemma3ForConditionalGeneration.from_pretrained(
12-
model_id, device_map="auto", torch_dtype="auto"
13-
)
11+
model = Gemma3ForConditionalGeneration.from_pretrained(model_id, torch_dtype="auto")
1412
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
1513

1614
# Oneshot arguments

examples/multimodal_vision/idefics3_example.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@
99

1010
# Load model.
1111
model_id = "HuggingFaceM4/Idefics3-8B-Llama3" # or "HuggingFaceTB/SmolVLM-Instruct"
12-
model = Idefics3ForConditionalGeneration.from_pretrained(
13-
model_id, device_map="auto", torch_dtype="auto"
14-
)
12+
model = Idefics3ForConditionalGeneration.from_pretrained(model_id, torch_dtype="auto")
1513
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
1614

1715
# Oneshot arguments

0 commit comments

Comments
 (0)