intel
diff --git a/‎test_cuda/test_2_3bits.py
Lines changed: 104 additions & 0 deletions b/‎test_cuda/test_2_3bits.py
Lines changed: 104 additions & 0 deletions
diff --git a/‎test/test_cuda_before_release.py renamed to ‎test_cuda/test_main_func.py
Lines changed: 53 additions & 24 deletions b/‎test/test_cuda_before_release.py renamed to ‎test_cuda/test_main_func.py
Lines changed: 53 additions & 24 deletions
diff --git a/‎test_cuda/test_multiple_card_calib.py
Lines changed: 49 additions & 0 deletions b/‎test_cuda/test_multiple_card_calib.py
Lines changed: 49 additions & 0 deletions
@@ -0,0 +1,104 @@
+import copy
+import shutil
+import sys
+import unittest
+import re
+
+sys.path.insert(0, "..")
+import torch
+import transformers
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+from auto_round import AutoRound
+from auto_round.eval.evaluation import simple_evaluate
+from lm_eval.utils import make_table  # pylint: disable=E0401
+
+
+def get_accuracy(data):
+    match = re.search(r'\|acc\s+\|[↑↓]\s+\|\s+([\d.]+)\|', data)
+
+    if match:
+        accuracy = float(match.group(1))
+        return accuracy
+    else:
+        return 0.0
+
+
+class TestAutoRound(unittest.TestCase):
+    @classmethod
+    def setUpClass(self):
+        self.save_dir = "./saved"
+        self.tasks = "lambada_openai"
+
+    @classmethod
+    def tearDownClass(self):
+        shutil.rmtree("./saved", ignore_errors=True)
+        shutil.rmtree("runs", ignore_errors=True)
+
+    def test_3bits_autogptq(self):
+        model_name = "/models/opt-125m"
+        model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        autoround = AutoRound(model, tokenizer, bits=3)
+        autoround.quantize()
+
+        autoround.save_quantized(self.save_dir, format="auto_gptq", inplace=False)
+        model_args = f"pretrained={self.save_dir}"
+        res = simple_evaluate(model="hf", model_args=model_args,
+                              tasks=self.tasks,
+                              batch_size="auto")
+        res = make_table(res)
+
+        accuracy = get_accuracy(res)
+        assert accuracy > 0.30
+        shutil.rmtree("./saved", ignore_errors=True)
+
+    def test_norm_bias_tuning(self):
+        model_name = "/models/opt-125m"
+        model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        autoround = AutoRound(model, tokenizer, bits=2, group_size=64, enable_norm_bias_tuning=True)
+        autoround.quantize()
+
+        ##test auto_round format
+        autoround.save_quantized(self.save_dir, format="auto_round", inplace=False)
+        model_args = f"pretrained={self.save_dir}"
+        res = simple_evaluate(model="hf", model_args=model_args,
+                              tasks=self.tasks,
+                              batch_size="auto")
+        res = make_table(res)  ##0.2212
+        accuracy = get_accuracy(res)
+        assert accuracy > 0.20
+        shutil.rmtree("./saved", ignore_errors=True)
+
+    def test_2bits_autoround(self):
+        model_name = "/models/opt-125m"
+        model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        autoround = AutoRound(model, tokenizer, bits=2, group_size=64)
+        autoround.quantize()
+
+        ##test auto_round format
+        autoround.save_quantized(self.save_dir, format="auto_round", inplace=False)
+        model_args = f"pretrained={self.save_dir}"
+        res = simple_evaluate(model="hf", model_args=model_args,
+                              tasks=self.tasks,
+                              batch_size="auto")
+        res = make_table(res) ##0.1985
+        accuracy = get_accuracy(res)
+        assert accuracy > 0.18
+        shutil.rmtree("./saved", ignore_errors=True)
+
+
+        autoround.save_quantized(self.save_dir, format="auto_gptq", inplace=False)
+        model_args = f"pretrained={self.save_dir}"
+        res = simple_evaluate(model="hf", model_args=model_args,
+                              tasks=self.tasks,
+                              batch_size="auto")
+        res = make_table(res) ##0.1985
+        accuracy = get_accuracy(res)
+        assert accuracy > 0.18
+        shutil.rmtree("./saved", ignore_errors=True)
+
+if __name__ == "__main__":
+    unittest.main()
@@ -9,7 +9,7 @@
 import transformers
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from auto_round import AutoRound
+from auto_round import AutoRound, AutoRoundAdam
 from auto_round.eval.evaluation import simple_evaluate
 from lm_eval.utils import make_table  # pylint: disable=E0401
 
@@ -24,7 +24,7 @@ def get_accuracy(data):
         return 0.0
 
 
-class TestAutoRound(unittest.TestCase):
+class TestMainFunc(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         self.save_dir = "./saved"
@@ -35,7 +35,6 @@ def tearDownClass(self):
         shutil.rmtree("./saved", ignore_errors=True)
         shutil.rmtree("runs", ignore_errors=True)
 
-    @unittest.skipIf(torch.cuda.is_available() is False, "Skipping because no cuda")
     def test_backend(self):
         model_name = "/models/opt-125m"
         model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
@@ -54,7 +53,7 @@ def test_backend(self):
         assert accuracy > 0.35
         shutil.rmtree("./saved", ignore_errors=True)
 
-        ##test auto_round format
+        ##test auto_gptq format
         autoround.save_quantized(self.save_dir, format="auto_gptq", inplace=False)
         model_args = f"pretrained={self.save_dir}"
         res = simple_evaluate(model="hf", model_args=model_args,
@@ -65,7 +64,7 @@ def test_backend(self):
         assert accuracy > 0.35
         shutil.rmtree("./saved", ignore_errors=True)
 
-        ##test auto_round format
+        ##test auto_awq format
         autoround.save_quantized(self.save_dir, format="auto_awq", inplace=False)
         model_args = f"pretrained={self.save_dir}"
         res = simple_evaluate(model="hf", model_args=model_args,
@@ -113,27 +112,57 @@ def test_fp_layers(self):
 
     @unittest.skipIf(torch.cuda.is_available() is False, "Skipping because no cuda")
     def test_undivided_group_size_tuning(self):
-        model_name = "/models/falcon-7b"
+        model_name = "/models/opt-125m"
         model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
         tokenizer = AutoTokenizer.from_pretrained(model_name)
 
-        autoround = AutoRound(model, tokenizer, bits=4, group_size=128, nsamples=1, iters=1)
+        autoround = AutoRound(model, tokenizer, bits=4, group_size=127, nsamples=2, iters=2)
+        autoround.quantize()
+
+
+    def test_adam(self):
+        model_name = "/models/opt-125m"
+        model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        autoround = AutoRoundAdam(model, tokenizer, bits=4, group_size=128)
+        autoround.quantize()
+
+        ##test auto_round format
+        autoround.save_quantized(self.save_dir, format="auto_round", inplace=False)
+        model_args = f"pretrained={self.save_dir}"
+        res = simple_evaluate(model="hf", model_args=model_args,
+                              tasks=self.tasks,
+                              batch_size="auto")
+        res = make_table(res)
+        accuracy = get_accuracy(res)
+        assert accuracy > 0.35
+        shutil.rmtree("./saved", ignore_errors=True)
+
+    def test_autoround_asym(self): ##need to install false
+        try:
+            from autoround_exllamav2_kernels import gemm_half_q_half, make_q_matrix
+        except ImportError as e:
+            print("skip autoround asym test, as autoround is not installed from source")
+            return
+        model_name = "/models/opt-125m"
+        model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        autoround = AutoRound(model, tokenizer, bits=4, group_size=128, sym=False)
         autoround.quantize()
+
+        ##test auto_round format
+        autoround.save_quantized(self.save_dir, format="auto_round", inplace=False)
+        model_args = f"pretrained={self.save_dir}"
+        res = simple_evaluate(model="hf", model_args=model_args,
+                              tasks=self.tasks,
+                              batch_size="auto")
+        res = make_table(res)
+        accuracy = get_accuracy(res)
+        assert accuracy > 0.35
+        shutil.rmtree("./saved", ignore_errors=True)
+
+
+
 
-    @unittest.skipIf(torch.cuda.is_available() is False, "Skipping because no cuda")
-    def test_vision_generation(self):
-        quantized_model_path = "OPEA/Phi-3.5-vision-instruct-qvision-int4-sym-inc"
-        from auto_round import AutoRoundConfig
-        device = "auto"  ##cpu, hpu, cuda
-        quantization_config = AutoRoundConfig(
-            backend=device
-        )
-        model = AutoModelForCausalLM.from_pretrained(quantized_model_path, trust_remote_code=True, 
-                                                     device_map=device, quantization_config=quantization_config)
-        tokenizer = AutoTokenizer.from_pretrained(quantized_model_path)
-        text = "There is a girl who likes adventure,"
-        inputs = tokenizer(text, return_tensors="pt").to(model.device)
-        res = tokenizer.decode(model.generate(**inputs, max_new_tokens=50)[0])
-        print(res)
-        assert (
-                    res == """<s> There is a girl who likes adventure, and she is looking for a partner to go on a treasure hunt. She has found a map that leads to a hidden treasure, but she needs a partner to help her decipher the clues and find the treasure. You""")
+if __name__ == "__main__":
+    unittest.main()
@@ -0,0 +1,49 @@
+import copy
+import shutil
+import sys
+import unittest
+import re
+
+sys.path.insert(0, "..")
+import torch
+import transformers
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+from auto_round import AutoRound
+from auto_round.eval.evaluation import simple_evaluate
+from lm_eval.utils import make_table  # pylint: disable=E0401
+import os
+
+def get_accuracy(data):
+    match = re.search(r'\|acc\s+\|[↑↓]\s+\|\s+([\d.]+)\|', data)
+
+    if match:
+        accuracy = float(match.group(1))
+        return accuracy
+    else:
+        return 0.0
+
+
+class TestAutoRound(unittest.TestCase):
+    @classmethod
+    def setUpClass(self):
+        self.save_dir = "./saved"
+        self.tasks = "lambada_openai"
+
+    @classmethod
+    def tearDownClass(self):
+        shutil.rmtree("./saved", ignore_errors=True)
+        shutil.rmtree("runs", ignore_errors=True)
+
+    def test_multiple_card_calib(self):
+        python_path = sys.executable
+
+        ##test llm script
+        res = os.system(
+            f"cd .. && {python_path} -m auto_round --model /models/Meta-Llama-3.1-8B-Instruct --devices '0,1' --quant_lm_head --disable_eval --iters 1 --nsamples 1 --output_dir None")
+
+
+if __name__ == "__main__":
+    unittest.main()
+
+