9
9
import transformers
10
10
from transformers import AutoModelForCausalLM , AutoTokenizer
11
11
12
- from auto_round import AutoRound
12
+ from auto_round import AutoRound , AutoRoundAdam
13
13
from auto_round .eval .evaluation import simple_evaluate
14
14
from lm_eval .utils import make_table # pylint: disable=E0401
15
15
@@ -24,7 +24,7 @@ def get_accuracy(data):
24
24
return 0.0
25
25
26
26
27
- class TestAutoRound (unittest .TestCase ):
27
+ class TestMainFunc (unittest .TestCase ):
28
28
@classmethod
29
29
def setUpClass (self ):
30
30
self .save_dir = "./saved"
@@ -35,7 +35,6 @@ def tearDownClass(self):
35
35
shutil .rmtree ("./saved" , ignore_errors = True )
36
36
shutil .rmtree ("runs" , ignore_errors = True )
37
37
38
- @unittest .skipIf (torch .cuda .is_available () is False , "Skipping because no cuda" )
39
38
def test_backend (self ):
40
39
model_name = "/models/opt-125m"
41
40
model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = torch .float16 , device_map = "auto" )
@@ -54,7 +53,7 @@ def test_backend(self):
54
53
assert accuracy > 0.35
55
54
shutil .rmtree ("./saved" , ignore_errors = True )
56
55
57
- ##test auto_round format
56
+ ##test auto_gptq format
58
57
autoround .save_quantized (self .save_dir , format = "auto_gptq" , inplace = False )
59
58
model_args = f"pretrained={ self .save_dir } "
60
59
res = simple_evaluate (model = "hf" , model_args = model_args ,
@@ -65,7 +64,7 @@ def test_backend(self):
65
64
assert accuracy > 0.35
66
65
shutil .rmtree ("./saved" , ignore_errors = True )
67
66
68
- ##test auto_round format
67
+ ##test auto_awq format
69
68
autoround .save_quantized (self .save_dir , format = "auto_awq" , inplace = False )
70
69
model_args = f"pretrained={ self .save_dir } "
71
70
res = simple_evaluate (model = "hf" , model_args = model_args ,
@@ -113,27 +112,57 @@ def test_fp_layers(self):
113
112
114
113
@unittest .skipIf (torch .cuda .is_available () is False , "Skipping because no cuda" )
115
114
def test_undivided_group_size_tuning (self ):
116
- model_name = "/models/falcon-7b "
115
+ model_name = "/models/opt-125m "
117
116
model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = torch .float16 , device_map = "auto" )
118
117
tokenizer = AutoTokenizer .from_pretrained (model_name )
119
118
120
- autoround = AutoRound (model , tokenizer , bits = 4 , group_size = 128 , nsamples = 1 , iters = 1 )
119
+ autoround = AutoRound (model , tokenizer , bits = 4 , group_size = 127 , nsamples = 2 , iters = 2 )
120
+ autoround .quantize ()
121
+
122
+
123
+ def test_adam (self ):
124
+ model_name = "/models/opt-125m"
125
+ model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = torch .float16 , device_map = "auto" )
126
+ tokenizer = AutoTokenizer .from_pretrained (model_name )
127
+ autoround = AutoRoundAdam (model , tokenizer , bits = 4 , group_size = 128 )
128
+ autoround .quantize ()
129
+
130
+ ##test auto_round format
131
+ autoround .save_quantized (self .save_dir , format = "auto_round" , inplace = False )
132
+ model_args = f"pretrained={ self .save_dir } "
133
+ res = simple_evaluate (model = "hf" , model_args = model_args ,
134
+ tasks = self .tasks ,
135
+ batch_size = "auto" )
136
+ res = make_table (res )
137
+ accuracy = get_accuracy (res )
138
+ assert accuracy > 0.35
139
+ shutil .rmtree ("./saved" , ignore_errors = True )
140
+
141
+ def test_autoround_asym (self ): ##need to install false
142
+ try :
143
+ from autoround_exllamav2_kernels import gemm_half_q_half , make_q_matrix
144
+ except ImportError as e :
145
+ print ("skip autoround asym test, as autoround is not installed from source" )
146
+ return
147
+ model_name = "/models/opt-125m"
148
+ model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = torch .float16 , device_map = "auto" )
149
+ tokenizer = AutoTokenizer .from_pretrained (model_name )
150
+ autoround = AutoRound (model , tokenizer , bits = 4 , group_size = 128 , sym = False )
121
151
autoround .quantize ()
152
+
153
+ ##test auto_round format
154
+ autoround .save_quantized (self .save_dir , format = "auto_round" , inplace = False )
155
+ model_args = f"pretrained={ self .save_dir } "
156
+ res = simple_evaluate (model = "hf" , model_args = model_args ,
157
+ tasks = self .tasks ,
158
+ batch_size = "auto" )
159
+ res = make_table (res )
160
+ accuracy = get_accuracy (res )
161
+ assert accuracy > 0.35
162
+ shutil .rmtree ("./saved" , ignore_errors = True )
163
+
164
+
165
+
122
166
123
- @unittest .skipIf (torch .cuda .is_available () is False , "Skipping because no cuda" )
124
- def test_vision_generation (self ):
125
- quantized_model_path = "OPEA/Phi-3.5-vision-instruct-qvision-int4-sym-inc"
126
- from auto_round import AutoRoundConfig
127
- device = "auto" ##cpu, hpu, cuda
128
- quantization_config = AutoRoundConfig (
129
- backend = device
130
- )
131
- model = AutoModelForCausalLM .from_pretrained (quantized_model_path , trust_remote_code = True ,
132
- device_map = device , quantization_config = quantization_config )
133
- tokenizer = AutoTokenizer .from_pretrained (quantized_model_path )
134
- text = "There is a girl who likes adventure,"
135
- inputs = tokenizer (text , return_tensors = "pt" ).to (model .device )
136
- res = tokenizer .decode (model .generate (** inputs , max_new_tokens = 50 )[0 ])
137
- print (res )
138
- assert (
139
- res == """<s> There is a girl who likes adventure, and she is looking for a partner to go on a treasure hunt. She has found a map that leads to a hidden treasure, but she needs a partner to help her decipher the clues and find the treasure. You""" )
167
+ if __name__ == "__main__" :
168
+ unittest .main ()
0 commit comments