PaddlePaddle · llbdyiu66 · Sep 24, 2025 · Sep 19, 2025
diff --git a/examples/README.md b/examples/README.md
@@ -19,12 +19,12 @@ export DOWNLOAD_SOURCE=aistudio
 
 ### Paddle 权重使用说明
 
-使用 **Paddle** 格式权重，需要在配置文件（如 `sft_full.json`、`sft_lora.json`等）中手动添加以下参数，以避免与 **HuggingFace** 格式冲突：
+使用 **Paddle** 格式权重，需要在配置文件（如 `sft_full.yaml`、`sft_lora.yaml`等）中手动添加以下参数，以避免与 **HuggingFace** 格式冲突：
 
-```json
-"model_name_or_path": "your_model_name",
-"convert_from_hf": false,
-"save_to_hf": false,
+```yaml
+model_name_or_path: your_model_name_or_path
+convert_from_hf: false
+save_to_hf: false
 ```
 
 
@@ -55,19 +55,19 @@ tar -xvf alpaca_demo.gz
 
 单卡
 ```bash
-python -u run_finetune.py ./config/sft_full.json
+python -u run_finetune.py ./config/sft_full.yaml
 ```
 
 多卡
 ```bash
-python -u -m paddle.distributed.launch --devices "0,1,2,3,4,5,6,7" run_finetune.py ./config/sft_full.json
+python -u -m paddle.distributed.launch --devices "0,1,2,3,4,5,6,7" run_finetune.py ./config/sft_full.yaml
 ```
 
 ### 1.3 LoRA SFT
 
 LoRA SFT 启动命令参考
 ```bash
-python -u run_finetune.py ./config/sft_lora.json
+python -u run_finetune.py ./config/sft_lora.yaml
 ```
 
 
@@ -109,19 +109,19 @@ tar -zxvf ultrafeedback_binarized.tar.gz
 
 单卡
 ```bash
-python -u ./alignment/dpo/run_dpo.py ./config/dpo_full.json
+python -u ./alignment/dpo/run_dpo.py ./config/dpo_full.yaml
 ```
 
 多卡
 ```bash
-python -u -m paddle.distributed.launch --devices "0,1,2,3,4,5,6,7" ./alignment/dpo/run_dpo.py ./config/dpo_full.json
+python -u -m paddle.distributed.launch --devices "0,1,2,3,4,5,6,7" ./alignment/dpo/run_dpo.py ./config/dpo_full.yaml
 ```
 
 ### 2.3 LoRA DPO
 
 LoRA DPO 启动命令参考
 ```bash
-python -u ./alignment/dpo/run_dpo.py ./config/dpo_lora.json
+python -u ./alignment/dpo/run_dpo.py ./config/dpo_lora.yaml
 ```
 
 

diff --git a/examples/alignment/dpo/run_dpo.py b/examples/alignment/dpo/run_dpo.py
@@ -76,6 +76,8 @@ def main():
     parser = PdArgumentParser((DPOModelArgument, DPODataArgument, DPOTrainingArguments, DPOConfig))
     if len(sys.argv) >= 2 and sys.argv[1].endswith(".json"):
         model_args, data_args, training_args, dpo_config = parser.parse_json_file_and_cmd_lines()
+    elif len(sys.argv) >= 2 and sys.argv[1].endswith(".yaml"):
+        model_args, data_args, training_args, dpo_config = parser.parse_yaml_file_and_cmd_lines()
     else:
         model_args, data_args, training_args, dpo_config = parser.parse_args_into_dataclasses()
 

diff --git a/examples/config/dpo_full.json b/examples/config/dpo_full.json
diff --git a/examples/config/dpo_full.yaml b/examples/config/dpo_full.yaml
@@ -0,0 +1,49 @@
+### data
+train_dataset_type: erniekit
+eval_dataset_type: erniekit
+train_dataset_path: ./data/dpo/train.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./data/dpo/dev.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 8192
+num_samples_each_epoch: 6000000
+packing: false
+mix_strategy: concat
+
+### model
+model_name_or_path: Qwen/Qwen3-0.6B-Base
+attn_impl: flashmask
+
+### finetuning
+# base
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+num_train_epochs: 1
+max_steps: -1
+eval_steps: 100
+evaluation_strategy: steps
+save_steps: 100
+save_total_limit: 1
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 4
+logging_dir: ./vdl_log
+output_dir: ./checkpoints/qwen3_hf_0p6b_dpo_ckpts
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+warmup_steps: 20
+learning_rate: 1.0e-6
+
+# performance
+tensor_parallel_degree: 1
+pipeline_parallel_degree: 1
+sharding: stage2
+recompute: true
+bf16: true
+fp16_opt_level: O2
+unified_checkpoint: true
diff --git a/examples/config/dpo_lora.json b/examples/config/dpo_lora.json
diff --git a/examples/config/dpo_lora.yaml b/examples/config/dpo_lora.yaml
@@ -0,0 +1,51 @@
+### data
+train_dataset_type: erniekit
+eval_dataset_type: erniekit
+train_dataset_path: ./data/dpo/train.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./data/dpo/dev.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 8192
+num_samples_each_epoch: 6000000
+packing: false
+mix_strategy: concat
+
+### model
+model_name_or_path: Qwen/Qwen3-0.6B-Base
+attn_impl: flashmask
+lora: true
+lora_rank: 8
+
+### finetuning
+# base
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+num_train_epochs: 1
+max_steps: -1
+eval_steps: 100
+evaluation_strategy: steps
+save_steps: 100
+save_total_limit: 1
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 4
+logging_dir: ./vdl_log
+output_dir: ./checkpoints/qwen3_hf_0p6b_dpo_lora_ckpts
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+warmup_steps: 20
+learning_rate: 1.0e-5
+
+# performance
+tensor_parallel_degree: 1
+pipeline_parallel_degree: 1
+sharding: stage2
+recompute: true
+bf16: true
+fp16_opt_level: O2
+unified_checkpoint: true
diff --git a/examples/config/sft_full.json b/examples/config/sft_full.json
diff --git a/examples/config/sft_full.yaml b/examples/config/sft_full.yaml
@@ -0,0 +1,49 @@
+### data
+train_dataset_type: erniekit
+eval_dataset_type: erniekit
+train_dataset_path: ./data/sft/train.json
+train_dataset_prob: "1.0"
+eval_dataset_path: ./data/sft/dev.json
+eval_dataset_prob: "1.0"
+max_seq_len: 8192
+num_samples_each_epoch: 6000000
+packing: false
+mix_strategy: concat
+
+### model
+model_name_or_path: Qwen/Qwen3-0.6B-Base
+attn_impl: flashmask
+
+### finetuning
+# base
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+num_train_epochs: 1
+max_steps: -1
+eval_steps: 100
+evaluation_strategy: steps
+save_steps: 100
+save_total_limit: 1
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 4
+logging_dir: ./vdl_log
+output_dir: ./checkpoints/qwen3_hf_0p6b_sft_ckpts
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+warmup_steps: 20
+learning_rate: 1.0e-5
+
+# performance
+tensor_parallel_degree: 1
+pipeline_parallel_degree: 1
+sharding: stage2
+recompute: true
+bf16: true
+fp16_opt_level: O2
+unified_checkpoint: true
diff --git a/examples/config/sft_lora.json b/examples/config/sft_lora.json