fix: corrected minor code errors from PR

bayo-ibm · bayo-ibm · commit 42d9ffbcf278 · 2025-05-21T03:26:23.000-04:00
Signed-off-by: omobayode.fagbohungbe &lt;omobayode.fagbohungbe@ibm.com&gt;
diff --git a/fms_mo/__init__.py b/fms_mo/__init__.py
@@ -19,7 +19,7 @@
 
 # Local
 from fms_mo.prep import qmodel_prep
-from fms_mo.utils.qconfig_utils import qconfig_init, qconfig_load
+from fms_mo.utils.qconfig_utils import qconfig_init
 
 VERSION_FALLBACK = "0.0.0"
 
diff --git a/fms_mo/dq.py b/fms_mo/dq.py
@@ -35,7 +35,7 @@
 import torch
 
 # Local
-from fms_mo import qconfig_init, qmodel_prep, qconfig_load
+from fms_mo import qconfig_init, qmodel_prep
 from fms_mo.fx.utils import model_size_Wb
 from fms_mo.quant.ptq import (
     calibration_llm_1GPU,
@@ -214,7 +214,6 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
         q_file = open('qcfg_llama.json', "r", encoding="utf-8")
         saved_qcfg = json.load(q_file)
         qcfg.update(saved_qcfg)
-        print(qcfg)
         
     qmodel_prep(
         model,
@@ -252,7 +251,6 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
         model.save_pretrained(opt_args.output_dir, use_safetensors=True)
         tokenizer.save_pretrained(opt_args.output_dir)
     else:
-        pass
         from accelerate import load_checkpoint_and_dispatch
         model = load_checkpoint_and_dispatch( model, checkpoint=opt_args.output_dir, device_map=None, no_split_module_classes=['Block'])
 
diff --git a/fms_mo/prep.py b/fms_mo/prep.py
@@ -535,7 +535,18 @@ def has_quantized_module(model):
     """Check if model is already quantized - do not want to quantize twice if so"""
     return any(isinstance(m, quantized_modules) for m in model.modules())
 
-def swap_qbmm(model, qcfg):
+def swap_qbmm(model: nn.Module, qcfg: dict):
+    """Go through all model.named_modules(), try to create an equivalent Qbmm layer to replace each of
+    the existing linear Bmm layers.
+
+    Args:
+        model (nn.Module): input model to be "prepared"
+        qcfg (dict): quant config
+
+    Returns: updated model is returned with the Qbmm added
+        
+    """
+
     from fms_mo.modules import QBmm
 
     qcfg["which2patch_contextmanager"] = qcfg["bmm_prep"][
@@ -650,7 +661,6 @@ def qmodel_prep(
     if mode:
         
         if qcfg.get("QBmm"): 
-            pass
             swap_qbmm(model,qcfg)
 
         model = q_any_net_5(model, qcfg, verbose = False)