Skip to content

Commit d2cbed9

Browse files
authored
Temporarily close qxk api for new release (#478)
* Temporarily close qxk api for new release
1 parent 050594f commit d2cbed9

File tree

6 files changed

+27
-25
lines changed

6 files changed

+27
-25
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,8 @@ autoround = AutoRound(model, tokenizer, bits=bits, group_size=group_size, sym=sy
157157
## the best accuracy, 3X slower, low_gpu_mem_usage could save ~20G but ~30% slower
158158
# autoround = AutoRound(model, tokenizer, nsamples=512, iters=1000, low_gpu_mem_usage=True, bits=bits, group_size=group_size, sym=sym)
159159

160-
## fast and low memory, 2-3X speedup, slight accuracy drop at W4G128
161-
# autoround = AutoRound(model, tokenizer, nsamples=128, iters=200, seqlen=512, batch_size=4, bits=bits, group_size=group_size, sym=sym )
160+
## 2-3X speedup, slight accuracy drop at W4G128
161+
# autoround = AutoRound(model, tokenizer, nsamples=128, iters=50, lr=5e-3, seqlen=512, batch_size=4, bits=bits, group_size=group_size, sym=sym )
162162

163163
autoround.quantize()
164164
output_dir = "./tmp_autoround"

auto_round/export/export_to_gguf/config.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,24 +18,24 @@
1818

1919
GGUF_CONFIG["gguf:q4_1"] = {"bits": 4, "act_bits": 16, "group_size": 32, "asym": True, "data_type": "int"}
2020

21-
GGUF_CONFIG["gguf:q4_k_s"] = {
22-
"bits": 4,
23-
"act_bits": 16,
24-
"super_group_size": 8,
25-
"super_bits": 6,
26-
"group_size": 32,
27-
"asym": True,
28-
"data_type": "int_asym_dq"
29-
}
21+
# GGUF_CONFIG["gguf:q4_k_s"] = {
22+
# "bits": 4,
23+
# "act_bits": 16,
24+
# "super_group_size": 8,
25+
# "super_bits": 6,
26+
# "group_size": 32,
27+
# "asym": True,
28+
# "data_type": "int_asym_dq"
29+
# }
3030

31-
GGUF_CONFIG["gguf:q2_k_s"] = {
32-
"bits": 2,
33-
"act_bits": 16,
34-
"super_group_size": 16,
35-
"super_bits": 4,
36-
"group_size": 16,
37-
"asym": True,
38-
"data_type": "int_asym_dq"
39-
}
31+
# GGUF_CONFIG["gguf:q2_k_s"] = {
32+
# "bits": 2,
33+
# "act_bits": 16,
34+
# "super_group_size": 16,
35+
# "super_bits": 4,
36+
# "group_size": 16,
37+
# "asym": True,
38+
# "data_type": "int_asym_dq"
39+
# }
4040

4141
GGUF_CONFIG["gguf:q8_0"] = {"bits": 8, "act_bits": 16, "group_size": 32, "asym": False, "data_type": "int"}

auto_round/export/export_to_gguf/export.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@
3131
"q8_0": gguf.LlamaFileType.MOSTLY_Q8_0,
3232
"q4_0": gguf.LlamaFileType.MOSTLY_Q4_0,
3333
"q4_1": gguf.LlamaFileType.MOSTLY_Q4_1,
34-
"q4_k_s": gguf.LlamaFileType.MOSTLY_Q4_K_S,
35-
"q2_k_s": gguf.LlamaFileType.MOSTLY_Q2_K_S,
34+
# "q4_k_s": gguf.LlamaFileType.MOSTLY_Q4_K_S,
35+
# "q2_k_s": gguf.LlamaFileType.MOSTLY_Q2_K_S,
3636
"q8_0": gguf.LlamaFileType.MOSTLY_Q8_0,
3737
"auto": gguf.LlamaFileType.GUESSED,
3838
}
@@ -66,6 +66,7 @@ def save_quantized_as_gguf(output_dir, backend="gguf:q4_0", **kwargs):
6666
model_name = model_name[-1]
6767

6868
output_type = backend.split(":")[-1]
69+
assert output_type.lower() in FTYPE_MAP, f"{output_type} is not supported"
6970
output_type = FTYPE_MAP.get(output_type.lower())
7071

7172

test/test_autoround_acc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def test_default_acc(self):
6767
out1 = model_tmp(inp)
6868

6969
assert out0[0].equal(out1[0])
70-
self.assertTrue(isclose(float(out0[0][0][0][0]), -0.021002087742090225, rel_tol=1e-04))
70+
self.assertTrue(isclose(float(out0[0][0][0][0]), -0.021002087742090225, rel_tol=5e-04))
7171

7272

7373
if __name__ == "__main__":

test/test_autoround_export_to_itrex.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,11 @@ def test_autoround_int_quant(self):
7575
optq_2 = round(model, self.tokenizer, device="cpu", nsamples=20, seqlen=10)
7676
q_model, layer_config2 = optq_2.quantize()
7777
compressed_model = pack_model(model=q_model, layer_config=layer_config2, inplace=False)
78+
compressed_model = compressed_model.to(torch.float32)
7879
out4 = q_model(self.lm_input)
7980
out5 = compressed_model(self.lm_input)
8081
self.assertTrue(torch.all(out1[0] == out6[0]))
81-
self.assertTrue(torch.all(torch.isclose(out4[0], out5[0], atol=1e-3)))
82+
self.assertTrue(torch.all(torch.isclose(out4[0], out5[0], atol=5e-3)))
8283

8384
def test_config(self):
8485
from auto_round.export.export_to_itrex import QuantConfig

test/test_basic_usage.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def test_auto_round_cmd(self):
3131
assert False, "cmd line test fail, please have a check"
3232

3333
res = os.system(
34-
f"cd .. && {python_path} -m auto_round --model 'facebook/opt-125m' --eval_task_by_task --tasks piqa,openbookqa --bs 32"
34+
f"cd .. && {python_path} -m auto_round --model 'facebook/opt-125m' --iter 1 --nsamples 1 --eval_task_by_task --tasks piqa,openbookqa --bs 32"
3535
)
3636
if res > 0 or res == -1:
3737
assert False, "cmd line test fail, please have a check"

0 commit comments

Comments
 (0)