remove device mapping in gr4_ex.py

chichun-charlie-liu · chichun-charlie-liu · commit 2bf60aab83fe · 2025-09-16T10:15:58.000-04:00
Signed-off-by: cliu-us &lt;cliu@us.ibm.com&gt;
diff --git a/examples/quantization_w8a8_fp8/granite4_example.py b/examples/quantization_w8a8_fp8/granite4_example.py
@@ -14,13 +14,13 @@
 MODEL_ID = "ibm-granite/granite-4.0-tiny-preview"
 
 # Load model.
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_ID, torch_dtype="bfloat16", device_map="auto"
-)
+model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
 skip_router_only = True  # assume we want to quantize input/output moe layers
-ignore_lay = ["lm_head",]
+ignore_lay = [
+    "lm_head",
+]
 if skip_router_only:
     # swap moe linears to a custom class
     for n, m in model.named_modules():
diff --git a/src/llmcompressor/modeling/granite4.py b/src/llmcompressor/modeling/granite4.py
@@ -86,4 +86,3 @@ def __repr__(self):
                 f"in={self.weight.shape[2]})"
             )
         return f"{self.__class__.__name__}{sizes_str}"
-

Original file line number	Diff line number	Diff line change
`@@ -86,4 +86,3 @@ def __repr__(self):`
`86`	`86`	`f"in={self.weight.shape[2]})"`
`87`	`87`	`)`
`88`	`88`	`return f"{self.__class__.__name__}{sizes_str}"`
`89`		`-`