@@ -293,7 +293,7 @@ def __init__(
293
293
"act_sym" ,
294
294
"act_dynamic" ,
295
295
"act_data_type" ,
296
- "super_bits" ,
296
+ "super_bits" ,
297
297
"super_group_size"
298
298
]
299
299
@@ -428,8 +428,26 @@ def quantize_and_save(self, output_dir: str = "tmp_autoround", format: str = "au
428
428
logger .error (f"Unsupported format { format_ } , please choose from { supported_formats } " )
429
429
exit (- 1 )
430
430
431
+ # only support to export afp8
432
+ if self .act_bits <= 8 :
433
+ if "fp8" not in self .act_data_type :
434
+ if len (formats ) > 1 or "fake" not in formats :
435
+ logger .warning (
436
+ f"Currently only support to export auto_round format quantized model"
437
+ " with fp8 dtype activation for activation quantization."
438
+ " Change format to fake and save."
439
+ )
440
+ formats = ["fake" ]
441
+ else :
442
+ if len (formats ) > 1 or "auto_round" not in formats :
443
+ logger .warning (
444
+ f"Currently only support to export auto_round format for W{ self .bits } AFP8 model,"
445
+ " change format to auto_round"
446
+ )
447
+ formats = ["auto_round" ]
448
+
431
449
# If multiple formats are specified, enforce inplace=False
432
- if len (format ) > 1 :
450
+ if len (formats ) > 1 :
433
451
inplace = False
434
452
inplace = kwargs .get ("inplace" , inplace )
435
453
kwargs .pop ("inplace" , None )
@@ -1502,6 +1520,24 @@ def save_quantized(self, output_dir=None, format="auto_round", inplace=True, **k
1502
1520
Returns:
1503
1521
object: The compressed model object.
1504
1522
"""
1523
+ # only support to export afp8
1524
+ if self .act_bits <= 8 :
1525
+ if "fp8" not in self .act_data_type :
1526
+ if format != "fake" :
1527
+ logger .warning (
1528
+ f"Currently only support to export auto_round format quantized model"
1529
+ " with fp8 dtype activation for activation quantization."
1530
+ " Change format to fake and save."
1531
+ )
1532
+ format = "fake"
1533
+ else :
1534
+ if format != "auto_round" :
1535
+ logger .warning (
1536
+ f"Currently only support to export auto_round format for W{ self .bits } AFP8 model,"
1537
+ " change format to auto_round"
1538
+ )
1539
+ format = "auto_round"
1540
+
1505
1541
if self .low_cpu_mem_usage :
1506
1542
self .model = self .model .to ('cpu' )
1507
1543
0 commit comments