PaddlePaddle
diff --git a/‎.github/actions/check-bypass/action.yml‎
Lines changed: 41 additions & 0 deletions b/‎.github/actions/check-bypass/action.yml‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎.github/workflows/approval.yml‎
Lines changed: 8 additions & 0 deletions b/‎.github/workflows/approval.yml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎.github/workflows/check-bypass.yml‎
Lines changed: 52 additions & 0 deletions b/‎.github/workflows/check-bypass.yml‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎.github/workflows/distribute-a100.yml‎
Lines changed: 10 additions & 0 deletions b/‎.github/workflows/distribute-a100.yml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎.github/workflows/distribute-v100.yml‎
Lines changed: 10 additions & 0 deletions b/‎.github/workflows/distribute-v100.yml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎.github/workflows/lint.yml‎
Lines changed: 10 additions & 0 deletions b/‎.github/workflows/lint.yml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎llm/docs/finetune.md‎
Lines changed: 1 addition & 1 deletion b/‎llm/docs/finetune.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎llm/run_finetune.py‎
Lines changed: 6 additions & 0 deletions b/‎llm/run_finetune.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎llm/tools/merge_lora_params.py‎
Lines changed: 23 additions & 6 deletions b/‎llm/tools/merge_lora_params.py‎
Lines changed: 23 additions & 6 deletions
diff --git a/‎ops/csrc/setup.py‎
Lines changed: 6 additions & 23 deletions b/‎ops/csrc/setup.py‎
Lines changed: 6 additions & 23 deletions
@@ -0,0 +1,41 @@
+name: "Check bypass"
+description: "A custom action to encapsulate PFCCLab/ci-bypass"
+inputs:
+  github-token:
+    description: "GitHub token"
+    required: true
+  workflow-name:
+    description: "Workflow name"
+    required: true
+outputs:
+  can-skip:
+    description: "Whether the workflow can be skipped."
+    value: ${{ steps.check-bypass.outputs.can-skip }}
+
+runs:
+  using: "composite"
+  steps:
+    - id: check-bypass
+      name: Check Bypass
+      env:
+        CI_TEAM_MEMBERS: '["tianshuo78520a", "swgu98", "risemeup1", "XieYunshen","luotao1","From00"]'
+      uses: PFCCLab/ci-bypass@v1
+      with:
+        github-token: ${{ inputs.github-token }}
+        non-pull-request-event-strategy: 'never-skipped'
+        type: 'composite'
+        composite-rule: |
+          {
+            "any": [
+              {
+                "type": "labeled",
+                "label": ["skip-ci: ${{ inputs.workflow-name }}", "skip-ci: all"],
+                "username": ${{ env.CI_TEAM_MEMBERS }}
+              },
+              {
+                "type": "commented",
+                "comment-pattern": [".*/skip-ci ${{ inputs.workflow-name }}.*", ".*/skip-ci all.*"],
+                "username": ${{ env.CI_TEAM_MEMBERS }}
+              }
+            ]
+          }
@@ -33,6 +33,14 @@ jobs:
           git checkout -b test_pr upstream/${BRANCH}
           git merge --no-edit origin_pr
           git log --pretty=oneline -10
+
+      - name: Check bypass
+        id: check-bypass
+        uses: ./.github/actions/check-bypass
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          workflow-name: approval
+        
       - name: Display Required Approvers
         if: steps.check-bypass.outputs.can-skip != 'true'
         run: |
 
@@ -0,0 +1,52 @@
+on:
+  workflow_call:
+    inputs:
+      workflow-name:
+        required: true
+        type: string
+    secrets:
+      github-token:
+        required: true
+    outputs:
+      can-skip:
+        description: "Whether the workflow can be skipped."
+        value: ${{ jobs.check-bypass.outputs.can-skip }}
+
+jobs:
+  check-bypass:
+    name: Check bypass
+    runs-on:
+      group: APPROVAL
+    permissions:
+      contents: read
+    env:
+      CI_TEAM_MEMBERS: '["tianshuo78520a", "swgu98", "risemeup1" , "XieYunshen","luotao1","From00"]'
+    outputs:
+      can-skip: ${{ steps.check-bypass.outputs.can-skip }}
+    steps:
+      - name: Cleanup
+        run: |
+          rm -rf * .[^.]*
+
+      - id: check-bypass
+        name: Check Bypass
+        uses: PFCCLab/ci-bypass@v1
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          non-pull-request-event-strategy: 'never-skipped'
+          type: 'composite'
+          composite-rule: |
+            {
+              "any": [
+                {
+                  "type": "labeled",
+                  "label": ["skip-ci: ${{ inputs.workflow-name }}", "skip-ci: all"],
+                  "username": ${{ env.CI_TEAM_MEMBERS }}
+                },
+                {
+                  "type": "commented",
+                  "comment-pattern": [".*/skip-ci ${{ inputs.workflow-name }}.*", ".*/skip-ci all.*"],
+                  "username": ${{ env.CI_TEAM_MEMBERS }}
+                }
+              ]
+            }
@@ -37,8 +37,18 @@ defaults:
     shell: bash
 
 jobs:
+  check-bypass:
+    name: Check bypass
+    uses: ./.github/workflows/check-bypass.yml
+    with:
+      workflow-name: 'distribute-a100'
+    secrets:
+      github-token: ${{ secrets.GITHUB_TOKEN }}
+
   distribute-a100-ci:
     name: distribute-a100-ci
+    needs: check-bypass
+    if: ${{ needs.check-bypass.outputs.can-skip != 'true' }}
     runs-on:
       group: Distribute
     steps:
 
@@ -37,8 +37,18 @@ defaults:
     shell: bash
 
 jobs:
+  check-bypass:
+    name: Check bypass
+    uses: ./.github/workflows/check-bypass.yml
+    with:
+      workflow-name: 'distribute-v100'
+    secrets:
+      github-token: ${{ secrets.GITHUB_TOKEN }}
+
   distribute-v100-ci:
     name: distribute-v100-ci
+    needs: check-bypass
+    if: ${{ needs.check-bypass.outputs.can-skip != 'true' }}
     runs-on:
       group: Auto-Parallel
     steps:
 
@@ -13,8 +13,18 @@ env:
   TASK: PaddleNLP-CI-Lint-${{ github.event.pull_request.number }}
 
 jobs:
+  check-bypass:
+    name: Check bypass
+    uses: ./.github/workflows/check-bypass.yml
+    with:
+      workflow-name: 'lint'
+    secrets:
+      github-token: ${{ secrets.GITHUB_TOKEN }}
+
   Lint:
     name: Lint
+    needs: check-bypass
+    if: ${{ needs.check-bypass.outputs.can-skip != 'true' }}
     runs-on: [self-hosted, ernie-cpu]
     steps:
       - name: Run Container
 
@@ -94,7 +94,7 @@ python  -u  -m paddle.distributed.launch --gpus "0,1,2,3,4,5,6,7"  run_finetune.
 3. 可以通过设置`weight_quantize_algo`将主干模型量化低比特，例如'weight_only_int4','weight_only_int8'，'nf4'或'fp4'。具体参考精调参数介绍
 4. 设置`use_flash_attention`为 True 使用 FlashAttention。在 FlashAttention 打开的基础上设置`flash_mask`为 True 使用 FlashMask。
 5. LoRA API 支持4D 并行策略，可以通过控制`tensor_parallel_degree`、`pipeline_parallel_degree`、 `sharding`、`sharding_parallel_degree`调整并行训练策略，可拓展至**单机 LoRA 微调千亿模型**。
-6. 可配置`rslora`、`lora_plus_scale`、`pissa`、`lora_use_mixer`、`use_mora`等参数，使用 rsLoRA、LoRa+、PiSSA、MosLoRA（暂不支持张量模型并行）、MoRA（暂不支持张量模型并行） 等算法。
+6. 可配置`rslora`、`lora_plus_scale`、`pissa`、`lora_use_mixer`、`mixer_num`、`use_mora`等参数，使用 rsLoRA、LoRa+、PiSSA、MosLoRA（暂不支持张量模型并行）、LinChain（暂不支持张量模型并行）、MoRA（暂不支持张量模型并行） 等算法。
 
 为了后续的**压缩**和**静态图推理**方便，我们提供 LoRA 参数合并脚本，可以将 LoRA 参数合并到主干模型并保存相应的权重。
 ```
 
@@ -580,6 +580,12 @@ def create_peft_model(model_args, reft_args, training_args, dtype, model_config,
                 use_quick_lora=model_args.use_quick_lora,
                 lora_use_mixer=model_args.lora_use_mixer,
                 use_mora=model_args.use_mora,
+<<<<<<< HEAD
+                nola=model_args.nola,
+                nola_basis_num=model_args.nola_basis_num,
+=======
+                mixer_num=model_args.mixer_num,
+>>>>>>> upstream/develop
                 lorapro=model_args.lorapro,
             )
             if model_args.lorapro:
 
@@ -78,51 +78,68 @@ def weight_process(name, quant_config, lora_config, state_dict, device):
         raise ValueError(f"quant_config.weight_quantize_algo {quant_config.weight_quantize_algo} is not supported.")
 
 
+def get_mixer(mixer, mixer_num, index=0):
+    if index == mixer_num - 1:
+        return mixer[index]
+    else:
+        return mixer[index] @ get_mixer(mixer, mixer_num, index + 1)
+
+
 def lora_process(name, layer, lora_config, state_dict, device, lora_state_dict=None):
+
     target_device = device if device == "cpu" else device + ":0"
 
     if (name + ".weight") not in state_dict.keys():
         return
 
     weight = state_dict.pop(name + ".weight")
     lora_use_mixer = lora_config.lora_use_mixer
+
+    mixer_num = lora_config.mixer_num
+    mixer = {}
     use_mora = lora_config.use_mora
+
     if lora_state_dict is None:
         lora_A = state_dict.pop(name + ".lora_A")
         if not use_mora:
             lora_B = state_dict.pop(name + ".lora_B")
         if lora_use_mixer:
-            lora_AB = state_dict.pop(name + ".lora_AB")
+            for i in range(mixer_num):
+                mixer[i] = state_dict.pop(name + ".lora_mixer_" + str(i))
     else:
         lora_A = lora_state_dict.pop(name + ".lora_A")
         if not use_mora:
             lora_B = lora_state_dict.pop(name + ".lora_B")
         if lora_use_mixer:
-            lora_AB = lora_state_dict.pop(name + ".lora_AB")
+            for i in range(mixer_num):
+                mixer[i] = state_dict.pop(name + ".lora_mixer_" + str(i))
     if device != "cpu":
         weight = weight.to(target_device)
         lora_A = lora_A.to(target_device)
         if not use_mora:
             lora_B = lora_B.to(target_device)
         if lora_use_mixer:
-            lora_AB = lora_AB.to(target_device)
+            for key in mixer.keys():
+                mixer[key] = mixer[key].to(target_device)
 
     if device == "cpu" and weight.dtype.name == "BF16":
         weight = weight.astype("float32")
         lora_A = lora_A.astype("float32")
         if not use_mora:
             lora_B = lora_B.astype("float32")
+
         if lora_use_mixer:
-            lora_AB = lora_AB.astype(lora_config.dtype)
-            delta_weight = layer.get_delta_weight(lora_A, lora_B, lora_AB)
+            for key in mixer.keys():
+                mixer[key] = mixer[key].astype(lora_config.dtype)
+            delta_weight = layer.get_delta_weight(lora_A, lora_B, get_mixer(mixer, mixer_num))
         elif use_mora:
             delta_weight = layer.get_delta_weight(lora_A)
         else:
             delta_weight = layer.get_delta_weight(lora_A, lora_B)
         out = (weight + delta_weight).astype(lora_config.dtype)
     else:
         if lora_use_mixer:
-            delta_weight = layer.get_delta_weight(lora_A, lora_B, lora_AB)
+            delta_weight = layer.get_delta_weight(lora_A, lora_B, get_mixer(mixer, mixer_num))
         elif use_mora:
             delta_weight = layer.get_delta_weight(lora_A)
         else:
 
@@ -53,20 +53,6 @@ def run_single(func):
     p.join()
 
 
-def run_multi(func_list):
-    processes = []
-    for func in func_list:
-        processes.append(multiprocessing.Process(target=func))
-        processes.append(multiprocessing.Process(target=func))
-        processes.append(multiprocessing.Process(target=func))
-
-    for p in processes:
-        p.start()
-
-    for p in processes:
-        p.join()
-
-
 cc_flag = get_gencode_flags(compiled_all=False)
 cc = get_sm_version()
 
@@ -251,17 +237,14 @@ def setup_paddle_bwd_ops():
         ext_modules=CUDAExtension(
             include_dirs=paddle_includes,
             sources=sources,
+            extra_compile_args={}
         ),
     )
 
 
 if __name__ == "__main__":
-    run_multi(
-        [
-            setup_fast_ln,
-            setup_fused_ln,
-            setup_causal_conv1d,
-            setup_selective_scan,
-            setup_paddle_bwd_ops,
-        ],
-    )
+    setup_fast_ln()
+    setup_fused_ln()
+    setup_causal_conv1d()
+    setup_selective_scan()
+    setup_paddle_bwd_ops()