File tree Expand file tree Collapse file tree 2 files changed +4
-5
lines changed
examples/quantization_w8a8_fp8
src/llmcompressor/modeling Expand file tree Collapse file tree 2 files changed +4
-5
lines changed Original file line number Diff line number Diff line change 14
14
MODEL_ID = "ibm-granite/granite-4.0-tiny-preview"
15
15
16
16
# Load model.
17
- model = AutoModelForCausalLM .from_pretrained (
18
- MODEL_ID , torch_dtype = "bfloat16" , device_map = "auto"
19
- )
17
+ model = AutoModelForCausalLM .from_pretrained (MODEL_ID , torch_dtype = "auto" )
20
18
tokenizer = AutoTokenizer .from_pretrained (MODEL_ID )
21
19
22
20
skip_router_only = True # assume we want to quantize input/output moe layers
23
- ignore_lay = ["lm_head" ,]
21
+ ignore_lay = [
22
+ "lm_head" ,
23
+ ]
24
24
if skip_router_only :
25
25
# swap moe linears to a custom class
26
26
for n , m in model .named_modules ():
Original file line number Diff line number Diff line change @@ -86,4 +86,3 @@ def __repr__(self):
86
86
f"in={ self .weight .shape [2 ]} )"
87
87
)
88
88
return f"{ self .__class__ .__name__ } { sizes_str } "
89
-
You can’t perform that action at this time.
0 commit comments