3
3
import sys
4
4
import unittest
5
5
import re
6
+ import os
6
7
7
8
sys .path .insert (0 , ".." )
8
9
9
10
from PIL import Image
11
+ from auto_round import AutoRoundConfig
10
12
import requests
11
13
12
14
@@ -38,7 +40,7 @@ def tearDownClass(self):
38
40
# assert (
39
41
# res == """<s> There is a girl who likes adventure, and she is looking for a partner to go on a treasure hunt. She has found a map that leads to a hidden treasure, but she needs a partner to help her decipher the clues and find the treasure. You""")
40
42
41
- def inference (self , quantized_model_dir ):
43
+ def qwen_inference (self , quantized_model_dir ):
42
44
from transformers import Qwen2VLForConditionalGeneration , AutoProcessor , AutoTokenizer
43
45
tokenizer = AutoTokenizer .from_pretrained (quantized_model_dir )
44
46
processor = AutoProcessor .from_pretrained (quantized_model_dir , trust_remote_code = True )
@@ -104,19 +106,68 @@ def test_vlm_tune(self):
104
106
105
107
quantized_model_path = self .save_dir
106
108
autoround .save_quantized (quantized_model_path , format = 'auto_round' , inplace = False )
107
- self .inference (quantized_model_path )
109
+ self .qwen_inference (quantized_model_path )
108
110
shutil .rmtree (self .save_dir , ignore_errors = True )
109
111
autoround .save_quantized (quantized_model_path , format = 'auto_gptq' , inplace = False )
110
- self .inference (quantized_model_path )
112
+ self .qwen_inference (quantized_model_path )
111
113
shutil .rmtree (self .save_dir , ignore_errors = True )
112
114
115
+ def phi3_infernece (self , quantized_model_dir ):
116
+ from transformers import AutoModelForCausalLM , AutoProcessor
117
+ quantized_model_path = os .path .join (quantized_model_dir , "Phi-3.5-vision-instruct-w4g128-auto_round" )
118
+ res = os .system (f"cp /models/Phi-3.5-vision-instruct/*.py { quantized_model_path } " )
119
+ model = AutoModelForCausalLM .from_pretrained (
120
+ quantized_model_path ,
121
+ device_map = "auto" ,
122
+ trust_remote_code = True ,
123
+ torch_dtype = "auto"
124
+ )
125
+ processor = AutoProcessor .from_pretrained (quantized_model_path ,
126
+ trust_remote_code = True ,
127
+ num_crops = 4
128
+ )
129
+
130
+ image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
131
+ content = "Describe this image."
132
+ messages = [
133
+ {"role" : "user" ,
134
+ "content" : "<|image_1|>\n " + content },
135
+ ]
136
+
137
+ prompt = processor .tokenizer .apply_chat_template (
138
+ messages ,
139
+ tokenize = False ,
140
+ add_generation_prompt = True
141
+ )
142
+ image_inputs = Image .open (requests .get (image_url , stream = True ).raw )
143
+ inputs = processor (prompt , image_inputs , return_tensors = "pt" ).to (model .device )
144
+
145
+ generation_args = {
146
+ "max_new_tokens" : 1000 ,
147
+ "temperature" : 0.0 ,
148
+ "do_sample" : False ,
149
+ }
150
+
151
+ generate_ids = model .generate (** inputs ,
152
+ eos_token_id = processor .tokenizer .eos_token_id ,
153
+ ** generation_args
154
+ )
155
+
156
+ # remove input tokens
157
+ generate_ids = generate_ids [:, inputs ['input_ids' ].shape [1 ]:]
158
+ response = processor .batch_decode (generate_ids ,
159
+ skip_special_tokens = True ,
160
+ clean_up_tokenization_spaces = False )[0 ]
161
+
162
+ print (response )
163
+
113
164
def test_quant_not_text (self ):
114
165
from auto_round import AutoRoundMLLM
115
- from transformers import Qwen2VLForConditionalGeneration , AutoProcessor , AutoTokenizer
166
+ from transformers import AutoModelForCausalLM , AutoProcessor , AutoTokenizer
116
167
117
168
## load the model
118
- model_name = "/models/Qwen2-VL-2B-Instruct "
119
- model = Qwen2VLForConditionalGeneration .from_pretrained (
169
+ model_name = "/models/Phi-3.5-vision-instruct "
170
+ model = AutoModelForCausalLM .from_pretrained (
120
171
model_name , trust_remote_code = True )
121
172
tokenizer = AutoTokenizer .from_pretrained (model_name )
122
173
processor = AutoProcessor .from_pretrained (model_name , trust_remote_code = True )
@@ -127,19 +178,21 @@ def test_quant_not_text(self):
127
178
bits = bits , group_size = group_size , sym = sym , iters = 1 , nsamples = 1 ,quant_nontext_module = True )
128
179
autoround .quantize ()
129
180
130
- quantized_model_path = "./saved"
131
- autoround .save_quantized (quantized_model_path , format = 'auto_round' , inplace = False )
132
- self .inference ( quantized_model_path )
181
+ quantized_model_path = "./saved/Phi-3.5-vision-instruct-w4g128-auto_round "
182
+ autoround .save_quantized (quantized_model_path , format = 'auto_round' , inplace = False , safe_serialization = False )
183
+ self .phi3_infernece ( "./saved" )
133
184
shutil .rmtree ("./saved" , ignore_errors = True )
134
185
135
186
def test_quant_not_text_fp_layers (self ):
136
187
import os
137
188
python_path = sys .executable
138
189
absolute_path = os .path .abspath (self .save_dir )
139
190
res = os .system (
140
- f"cd .. && { python_path } -m auto_round --mllm --model /models/Qwen2-VL-2B-Instruct --fp_layers model.layers.27,visual.blocks.29 --quant_nontext_module --iters 1 --nsamples 1 --output_dir { absolute_path } " )
141
- self .inference (os .path .join (absolute_path ,"Qwen2-VL-2B-Instruct-w4g128-auto_round" ))
142
- shutil .rmtree (os .path .join (absolute_path ,"Qwen2-VL-2B-Instruct-w4g128-auto_round" ), ignore_errors = True )
191
+ f"cd .. && { python_path } -m auto_round --mllm --model /models/Phi-3.5-vision-instruct "
192
+ f"--fp_layers model.layers.27,model.vision_embed_tokens.img_processor.vision_model.encoder.layers.16 "
193
+ f"--quant_nontext_module --iters 1 --nsamples 1 --output_dir { absolute_path } " )
194
+ self .phi3_infernece (absolute_path )
195
+ shutil .rmtree (absolute_path , ignore_errors = True )
143
196
144
197
145
198
0 commit comments