Skip to content

Commit 2bf60aa

Browse files
remove device mapping in gr4_ex.py
Signed-off-by: cliu-us <[email protected]>
1 parent 92a5c17 commit 2bf60aa

File tree

2 files changed

+4
-5
lines changed

2 files changed

+4
-5
lines changed

examples/quantization_w8a8_fp8/granite4_example.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@
1414
MODEL_ID = "ibm-granite/granite-4.0-tiny-preview"
1515

1616
# Load model.
17-
model = AutoModelForCausalLM.from_pretrained(
18-
MODEL_ID, torch_dtype="bfloat16", device_map="auto"
19-
)
17+
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
2018
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
2119

2220
skip_router_only = True # assume we want to quantize input/output moe layers
23-
ignore_lay = ["lm_head",]
21+
ignore_lay = [
22+
"lm_head",
23+
]
2424
if skip_router_only:
2525
# swap moe linears to a custom class
2626
for n, m in model.named_modules():

src/llmcompressor/modeling/granite4.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,4 +86,3 @@ def __repr__(self):
8686
f"in={self.weight.shape[2]})"
8787
)
8888
return f"{self.__class__.__name__}{sizes_str}"
89-

0 commit comments

Comments
 (0)