SKKU-ESLAB · kimjoohyungsd · Jun 20, 2025 · Jun 28, 2025 · Jul 2, 2025 · Jul 2, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -193,10 +193,10 @@ if (LLAMA_BUILD_COMMON)
     add_subdirectory(common)
 endif()
 
-if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
-    include(CTest)
-    add_subdirectory(tests)
-endif()
+# if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
+#     include(CTest)
+#     add_subdirectory(tests)
+# endif()
 
 if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
     add_subdirectory(examples)

diff --git a/common/arg.cpp b/common/arg.cpp
@@ -2274,6 +2274,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             params.use_mmap = false;
         }
     ).set_env("LLAMA_ARG_NO_MMAP"));
+    add_opt(common_arg(
+        {"--use-R3"},
+        "apply R3 Online Hadamard Transform during computational process mentioned in Spinquant paper",
+        [](common_params & params) {
+            params.online_R3 = true;
+        }
+    ));
+    add_opt(common_arg(
+        {"--use-R4"},
+        "apply R4 Online Hadamard Transform during computational process mentioned in Spinquant paper",
+        [](common_params & params) {
+            params.online_R4 = true;
+        }
+    ));
     add_opt(common_arg(
         {"--numa"}, "TYPE",
         "attempt optimizations that help on some NUMA systems\n"
@@ -3225,7 +3239,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             params.vocoder.use_guide_tokens = true;
         }
     ).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER}));
-    add_opt(common_arg(
+    add_opt(common_arg( 
         {"--tts-speaker-file"}, "FNAME",
         "speaker file path for audio generation",
         [](common_params & params, const std::string & value) {

diff --git a/common/common.cpp b/common/common.cpp
@@ -1101,6 +1101,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
     mparams.use_mmap        = params.use_mmap;
     mparams.use_mlock       = params.use_mlock;
     mparams.check_tensors   = params.check_tensors;
+    mparams.online_R4       = params.online_R4;
 
     if (params.kv_overrides.empty()) {
         mparams.kv_overrides = NULL;
@@ -1152,6 +1153,8 @@ struct llama_context_params common_context_params_to_llama(const common_params &
     cparams.op_offload        = !params.no_op_offload;
     cparams.swa_full          = params.swa_full;
 
+    cparams.online_R3         = params.online_R3;// online x` transform을 llama_context_params에도 설정을 한다
+
     cparams.type_k = params.cache_type_k;
     cparams.type_v = params.cache_type_v;
 

diff --git a/common/common.h b/common/common.h
@@ -339,6 +339,9 @@ struct common_params {
 
     bool single_turn       = false; // single turn chat conversation
 
+    bool online_R3   = false; //Online에서 R3 Hadamard Transform을 활용하는지
+    bool online_R4   = false; //Online에서 R4 Hadamard Transform을 활용하는지
+
     ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
     ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V