Skip to content

Commit 2ffadfd

Browse files
authored
fix gpu ut (#376)
1 parent ecc17be commit 2ffadfd

File tree

3 files changed

+72
-13
lines changed

3 files changed

+72
-13
lines changed

auto_round/mllm/autoround_mllm.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
from ..utils import (
2121
logger,
22+
detect_device,
2223
to_device,
2324
to_dtype,
2425
get_multimodal_block_names,
@@ -34,6 +35,7 @@
3435
def _only_text_test(model, tokenizer, device):
3536
"""Test if the model whether can use text-only datasets."""
3637
try:
38+
device = detect_device(device)
3739
text = ["only text", "test"]
3840
tokenizer.padding_side = 'left'
3941
if tokenizer.pad_token is None:

test_cuda/test_support_vlms.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ def test_qwen2(self):
7474
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
7575
)
7676
print(output_text[0])
77+
shutil.rmtree(quantized_model_path, ignore_errors=True)
7778

7879
def test_phi3(self):
7980
model_path = "/models/Phi-3.5-vision-instruct/"
@@ -129,8 +130,8 @@ def test_phi3(self):
129130
response = processor.batch_decode(generate_ids,
130131
skip_special_tokens=True,
131132
clean_up_tokenization_spaces=False)[0]
132-
133133
print(response)
134+
shutil.rmtree(quantized_model_path, ignore_errors=True)
134135

135136
def test_llava(self):
136137
model_path = "/models/llava-v1.5-7b/"
@@ -166,6 +167,7 @@ class DataArgs:
166167

167168
output = model.generate(inputs['input_ids'].to(model.device), images=image_input.unsqueeze(0).half(), max_new_tokens=50)
168169
print(tokenizer.batch_decode(output))
170+
shutil.rmtree(quantized_model_path, ignore_errors=True)
169171

170172
def test_llama(self):
171173
model_path = "/models/Llama-3.2-11B-Vision-Instruct/"
@@ -204,6 +206,7 @@ def test_llama(self):
204206

205207
output = model.generate(**inputs, max_new_tokens=50)
206208
print(processor.decode(output[0]))
209+
shutil.rmtree(quantized_model_path, ignore_errors=True)
207210

208211
def test_cogvlm(self):
209212
model_path = "/models/cogvlm2-llama3-chat-19B/"
@@ -257,6 +260,7 @@ def test_cogvlm(self):
257260
response = tokenizer.decode(outputs[0])
258261
response = response.split("<|end_of_text|>")[0]
259262
print(response)
263+
shutil.rmtree(quantized_model_path, ignore_errors=True)
260264

261265
if __name__ == "__main__":
262266
unittest.main()

test_cuda/test_vlms.py

Lines changed: 65 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
import sys
44
import unittest
55
import re
6+
import os
67

78
sys.path.insert(0, "..")
89

910
from PIL import Image
11+
from auto_round import AutoRoundConfig
1012
import requests
1113

1214

@@ -38,7 +40,7 @@ def tearDownClass(self):
3840
# assert (
3941
# res == """<s> There is a girl who likes adventure, and she is looking for a partner to go on a treasure hunt. She has found a map that leads to a hidden treasure, but she needs a partner to help her decipher the clues and find the treasure. You""")
4042

41-
def inference(self, quantized_model_dir):
43+
def qwen_inference(self, quantized_model_dir):
4244
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, AutoTokenizer
4345
tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir)
4446
processor = AutoProcessor.from_pretrained(quantized_model_dir, trust_remote_code=True)
@@ -104,19 +106,68 @@ def test_vlm_tune(self):
104106

105107
quantized_model_path = self.save_dir
106108
autoround.save_quantized(quantized_model_path, format='auto_round', inplace=False)
107-
self.inference(quantized_model_path)
109+
self.qwen_inference(quantized_model_path)
108110
shutil.rmtree(self.save_dir, ignore_errors=True)
109111
autoround.save_quantized(quantized_model_path, format='auto_gptq', inplace=False)
110-
self.inference(quantized_model_path)
112+
self.qwen_inference(quantized_model_path)
111113
shutil.rmtree(self.save_dir, ignore_errors=True)
112114

115+
def phi3_infernece(self, quantized_model_dir):
116+
from transformers import AutoModelForCausalLM, AutoProcessor
117+
quantized_model_path = os.path.join(quantized_model_dir, "Phi-3.5-vision-instruct-w4g128-auto_round")
118+
res = os.system(f"cp /models/Phi-3.5-vision-instruct/*.py {quantized_model_path}")
119+
model = AutoModelForCausalLM.from_pretrained(
120+
quantized_model_path,
121+
device_map="auto",
122+
trust_remote_code=True,
123+
torch_dtype="auto"
124+
)
125+
processor = AutoProcessor.from_pretrained(quantized_model_path,
126+
trust_remote_code=True,
127+
num_crops=4
128+
)
129+
130+
image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
131+
content = "Describe this image."
132+
messages = [
133+
{"role": "user",
134+
"content": "<|image_1|>\n"+content},
135+
]
136+
137+
prompt = processor.tokenizer.apply_chat_template(
138+
messages,
139+
tokenize=False,
140+
add_generation_prompt=True
141+
)
142+
image_inputs = Image.open(requests.get(image_url, stream=True).raw)
143+
inputs = processor(prompt, image_inputs, return_tensors="pt").to(model.device)
144+
145+
generation_args = {
146+
"max_new_tokens": 1000,
147+
"temperature": 0.0,
148+
"do_sample": False,
149+
}
150+
151+
generate_ids = model.generate(**inputs,
152+
eos_token_id=processor.tokenizer.eos_token_id,
153+
**generation_args
154+
)
155+
156+
# remove input tokens
157+
generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
158+
response = processor.batch_decode(generate_ids,
159+
skip_special_tokens=True,
160+
clean_up_tokenization_spaces=False)[0]
161+
162+
print(response)
163+
113164
def test_quant_not_text(self):
114165
from auto_round import AutoRoundMLLM
115-
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, AutoTokenizer
166+
from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer
116167

117168
## load the model
118-
model_name = "/models/Qwen2-VL-2B-Instruct"
119-
model = Qwen2VLForConditionalGeneration.from_pretrained(
169+
model_name = "/models/Phi-3.5-vision-instruct"
170+
model = AutoModelForCausalLM.from_pretrained(
120171
model_name, trust_remote_code=True)
121172
tokenizer = AutoTokenizer.from_pretrained(model_name)
122173
processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
@@ -127,19 +178,21 @@ def test_quant_not_text(self):
127178
bits=bits, group_size=group_size, sym=sym, iters=1, nsamples=1,quant_nontext_module=True)
128179
autoround.quantize()
129180

130-
quantized_model_path = "./saved"
131-
autoround.save_quantized(quantized_model_path, format='auto_round', inplace=False)
132-
self.inference(quantized_model_path)
181+
quantized_model_path = "./saved/Phi-3.5-vision-instruct-w4g128-auto_round"
182+
autoround.save_quantized(quantized_model_path, format='auto_round', inplace=False, safe_serialization=False)
183+
self.phi3_infernece("./saved")
133184
shutil.rmtree("./saved", ignore_errors=True)
134185

135186
def test_quant_not_text_fp_layers(self):
136187
import os
137188
python_path = sys.executable
138189
absolute_path = os.path.abspath(self.save_dir)
139190
res = os.system(
140-
f"cd .. && {python_path} -m auto_round --mllm --model /models/Qwen2-VL-2B-Instruct --fp_layers model.layers.27,visual.blocks.29 --quant_nontext_module --iters 1 --nsamples 1 --output_dir {absolute_path}")
141-
self.inference(os.path.join(absolute_path,"Qwen2-VL-2B-Instruct-w4g128-auto_round"))
142-
shutil.rmtree(os.path.join(absolute_path,"Qwen2-VL-2B-Instruct-w4g128-auto_round"), ignore_errors=True)
191+
f"cd .. && {python_path} -m auto_round --mllm --model /models/Phi-3.5-vision-instruct "
192+
f"--fp_layers model.layers.27,model.vision_embed_tokens.img_processor.vision_model.encoder.layers.16 "
193+
f"--quant_nontext_module --iters 1 --nsamples 1 --output_dir {absolute_path}")
194+
self.phi3_infernece(absolute_path)
195+
shutil.rmtree(absolute_path, ignore_errors=True)
143196

144197

145198

0 commit comments

Comments
 (0)