Skip to content

Commit af56508

Browse files
authored
add restrict for exporting act-quant models (#480)
1 parent d2cbed9 commit af56508

File tree

1 file changed

+38
-2
lines changed

1 file changed

+38
-2
lines changed

auto_round/autoround.py

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ def __init__(
293293
"act_sym",
294294
"act_dynamic",
295295
"act_data_type",
296-
"super_bits",
296+
"super_bits",
297297
"super_group_size"
298298
]
299299

@@ -428,8 +428,26 @@ def quantize_and_save(self, output_dir: str = "tmp_autoround", format: str = "au
428428
logger.error(f"Unsupported format {format_}, please choose from {supported_formats}")
429429
exit(-1)
430430

431+
# only support to export afp8
432+
if self.act_bits <= 8:
433+
if "fp8" not in self.act_data_type:
434+
if len(formats) > 1 or "fake" not in formats:
435+
logger.warning(
436+
f"Currently only support to export auto_round format quantized model"
437+
" with fp8 dtype activation for activation quantization."
438+
" Change format to fake and save."
439+
)
440+
formats = ["fake"]
441+
else:
442+
if len(formats) > 1 or "auto_round" not in formats:
443+
logger.warning(
444+
f"Currently only support to export auto_round format for W{self.bits}AFP8 model,"
445+
" change format to auto_round"
446+
)
447+
formats = ["auto_round"]
448+
431449
# If multiple formats are specified, enforce inplace=False
432-
if len(format) > 1:
450+
if len(formats) > 1:
433451
inplace = False
434452
inplace = kwargs.get("inplace", inplace)
435453
kwargs.pop("inplace", None)
@@ -1502,6 +1520,24 @@ def save_quantized(self, output_dir=None, format="auto_round", inplace=True, **k
15021520
Returns:
15031521
object: The compressed model object.
15041522
"""
1523+
# only support to export afp8
1524+
if self.act_bits <= 8:
1525+
if "fp8" not in self.act_data_type:
1526+
if format != "fake":
1527+
logger.warning(
1528+
f"Currently only support to export auto_round format quantized model"
1529+
" with fp8 dtype activation for activation quantization."
1530+
" Change format to fake and save."
1531+
)
1532+
format = "fake"
1533+
else:
1534+
if format != "auto_round":
1535+
logger.warning(
1536+
f"Currently only support to export auto_round format for W{self.bits}AFP8 model,"
1537+
" change format to auto_round"
1538+
)
1539+
format = "auto_round"
1540+
15051541
if self.low_cpu_mem_usage:
15061542
self.model = self.model.to('cpu')
15071543

0 commit comments

Comments
 (0)