PaddlePaddle · swgu98 · Jan 28, 2026 · Jan 14, 2026 · Jan 14, 2026 · Jan 19, 2026
diff --git a/.github/workflows/fleet-model-test.yml b/.github/workflows/fleet-model-test.yml
@@ -348,6 +348,21 @@ jobs:
           fi
           '
 
+      - name: GLM4.5 dpo_lora
+        if: (success() || failure()) && steps.formers_install.conclusion == 'success'
+        run: |
+          docker exec -t ${{ env.container_name }} /bin/bash -ce '
+          source /root/proxy
+          timeout 5m bash -x PaddleFormers/tests/integration_test/glm45_dpo_lora.sh
+          glm45_exit_code=$?
+          if [[ "$glm45_exit_code" != "0" ]]; then
+            echo -e "::error:: \033[31mIntegration test failed: GLM4.5 dpo lora.\033[0m"
+            exit 1
+          else
+            echo -e "\033[32mIntegration test succeeded: GLM4.5 dpo lora.\033[0m"
+          fi
+          '
+
       - name: GLM4.5 pre-train (FP8)
         if: (success() || failure()) && steps.formers_install.conclusion == 'success'
         run: |

diff --git a/paddleformers/cli/train/dpo/dpo_trainer.py b/paddleformers/cli/train/dpo/dpo_trainer.py
@@ -18,6 +18,7 @@
 from paddle.distributed import fleet
 
 from paddleformers.nn.criterion import CriterionLayer
+from paddleformers.peft import LoRAModel
 from paddleformers.peft.lora.lora_model import AVAILABLE_LAYERS
 from paddleformers.trainer import Trainer
 from paddleformers.transformers.model_utils import unwrap_model
@@ -227,8 +228,14 @@ def _wrap_ref_model(self, model):
 
     def _wrap_model(self, model, training=True):
         """Wrap model."""
-        if is_paddlefleet_available() and isinstance(model, PaddleFleetPipelineLayer):
-            model._prepare_pipeline_inputs_func = _prepare_pipeline_dpo_inputs_func_fleet
+        if is_paddlefleet_available() and (
+            isinstance(model, PaddleFleetPipelineLayer)
+            or (isinstance(model, LoRAModel) and isinstance(model.model, PaddleFleetPipelineLayer))
+        ):
+            if isinstance(model, LoRAModel):
+                model.model._prepare_pipeline_inputs_func = _prepare_pipeline_dpo_inputs_func_fleet
+            else:
+                model._prepare_pipeline_inputs_func = _prepare_pipeline_dpo_inputs_func_fleet
             model = super()._wrap_model(model, training)
             return model
 
@@ -351,7 +358,8 @@ def fleet_prediction_pipeline_step(
                     with self.autocast_smart_context_manager():
                         model.eval_batch(data=[inputs, labels], compute_loss=True)
                 self.enable_lora(model)
-                model._p2p_helper.clear_meta_cache()
+                if hasattr(model, "_p2p_helper"):
+                    model._p2p_helper.clear_meta_cache()
                 model.train()
             else:
                 ref_model = self.ref_model_wrapped
@@ -367,8 +375,9 @@ def fleet_prediction_pipeline_step(
         else:
             reference_chosen_logps = [paddle.zeros([1]) for _ in range(model.accumulate_steps)]
             reference_rejected_logps = [paddle.zeros([1]) for _ in range(model.accumulate_steps)]
-        if ref_model.is_pipeline_last_stage(ignore_virtual=ref_model._layers._num_virtual_pipeline_stages > 1):
-            if is_paddlefleet_available() and isinstance(ref_model, PaddleFleetParallelBase):
+
+        if model.is_pipeline_last_stage(ignore_virtual=model._layers._num_virtual_pipeline_stages > 1):
+            if is_paddlefleet_available() and isinstance(model, PaddleFleetParallelBase):
                 labels = fleet_merge_dpo_labels(labels, (reference_chosen_logps, reference_rejected_logps))
             else:
                 labels = labels[:-2] + (reference_chosen_logps, reference_rejected_logps)
@@ -567,7 +576,8 @@ def training_pipeline_step(self, model, inputs):
                     with self.autocast_smart_context_manager():
                         model.eval_batch(data=[inputs, labels], compute_loss=True)
                 self.enable_lora(model)
-                model._p2p_helper.clear_meta_cache()
+                if hasattr(model, "_p2p_helper"):
+                    model._p2p_helper.clear_meta_cache()
                 model.train()
             else:
                 ref_model = self.ref_model_wrapped

diff --git a/paddleformers/peft/lora/lora_model.py b/paddleformers/peft/lora/lora_model.py
@@ -627,6 +627,8 @@ def save_pretrained(self, save_directory: str, merge_tensor_parallel: bool = Fal
                         if n_replace > 0:
                             aoa_config["aoa_statements"].append(f"{key} -> {key_new}")
                             break
+            if hasattr(self.model, "_gen_lora_inv_aoa_config"):
+                aoa_config["aoa_statements"] += self.model._gen_lora_inv_aoa_config(self.model.config)
 
             HFFormatFullParamSaver(model_to_save, aoa_config).save_checkpoint(
                 save_directory, max_shard_size, save_peft=True

diff --git a/paddleformers/trainer/trainer.py b/paddleformers/trainer/trainer.py
@@ -1210,6 +1210,7 @@ def bf16_filtered_sharded_state_dict(sharded_state_dict):
                 model_sharded_state_dict = bf16_filtered_sharded_state_dict(model_sharded_state_dict)
             # NOTE(xingmingyyj) When saving model states only in float32 format, we assume that users
             # will not use AOA to change the mapping relationships among these float32 weights.
+
             dist.load_state_dict(
                 model_sharded_state_dict,
                 model_states_path,
@@ -4430,7 +4431,11 @@ def evaluation_loop(
             model = self.model_wrapped
             if _prepare_pipeline_inputs_func is not None:
                 model._prepare_pipeline_inputs_func = _prepare_pipeline_inputs_func
-        elif is_paddlefleet_available() and isinstance(self.model, GPTModel):
+        elif (
+            is_paddlefleet_available()
+            and isinstance(self.model, GPTModel)
+            or (isinstance(self.model, LoRAModel) and isinstance(self.model.model, GPTModel))
+        ):
             model = self.model_wrapped
         else:
             model = self.model

diff --git a/paddleformers/transformers/auto/modeling.py b/paddleformers/transformers/auto/modeling.py
@@ -193,7 +193,7 @@ def _get_model_class_from_config(cls, pretrained_model_name_or_path, config_file
         init_class = cls._name_mapping[model_name + "_Import_Class"]
         class_name = cls._name_mapping[init_class]
         import_class = importlib.import_module(f"paddleformers.transformers.{class_name}.modeling")
-        if is_lora:
+        if is_lora and class_name in ["qwen3_vl_moe"]:
             try:
                 model_class = getattr(import_class, init_class + "Decapitated")
                 return model_class

diff --git a/paddleformers/transformers/qwen3_vl/modeling_fleet.py b/paddleformers/transformers/qwen3_vl/modeling_fleet.py
@@ -1406,6 +1406,25 @@ def _gen_inv_aoa_config(cls, config: Qwen3VLConfig):
 
         return aoa_config
 
+    @classmethod
+    def _gen_lora_inv_aoa_config(cls, config: Qwen3VLConfig):
+        aoa_statements = [
+            state
+            for layer_id in range(config.text_config.num_hidden_layers)
+            for state in (
+                f"model.language_model.{layer_id + 1}.mlp.down_proj.lora_A -> model.language_model.layers.{layer_id}.mlp.down_proj.lora_A",
+                f"model.language_model.{layer_id + 1}.mlp.down_proj.lora_B -> model.language_model.layers.{layer_id}.mlp.down_proj.lora_B",
+                f"model.language_model.{layer_id + 1}.mlp.up_gate_proj.lora_A -> model.language_model.layers.{layer_id}.mlp.up_gate_proj.lora_A",
+                f"model.language_model.{layer_id + 1}.mlp.up_gate_proj.lora_B -> model.language_model.layers.{layer_id}.mlp.up_gate_proj.lora_B",
+                f"model.language_model.{layer_id + 1}.self_attn.o_proj.lora_A -> model.language_model.layers.{layer_id}.self_attn.o_proj.lora_A",
+                f"model.language_model.{layer_id + 1}.self_attn.o_proj.lora_B -> model.language_model.layers.{layer_id}.self_attn.o_proj.lora_B",
+                f"model.language_model.{layer_id + 1}.self_attn.qkv_proj.lora_A -> model.language_model.layers.{layer_id}.self_attn.qkv_proj.lora_A",
+                f"model.language_model.{layer_id + 1}.self_attn.qkv_proj.lora_B -> model.language_model.layers.{layer_id}.self_attn.qkv_proj.lora_B",
+            )
+        ]
+
+        return aoa_statements
+
 
 class Qwen3VLModel(Qwen3VLPretrainedModelFleet):
     config_class = Qwen3VLConfig

diff --git a/scripts/regression/test_dpo_tiny-random-glm4moe.py b/scripts/regression/test_dpo_tiny-random-glm4moe.py
@@ -39,15 +39,15 @@
 DPO_FULL_EXCEPTED_RESULT = [[10564, 10564, 102954, 47231, 47231, 47231, 47231, 47231, 47231, 47231]]
 
 DPO_LORA_EXCEPTED_LOSS = 0.693147
-DPO_LORA_RESUME_EXCEPTED_LOSS = 0.691905
+DPO_LORA_RESUME_EXCEPTED_LOSS = 0.693106
 DPO_LORA_EXCEPTED_RESULT = [[51172, 37927, 96130, 27654, 133362, 95331, 27654, 133362, 115845, 115845]]
 
 DPO_FULL_TP_PP_EXCEPTED_LOSS = 0.693147
 DPO_FULL_TP_PP_RESUME_EXCEPTED_LOSS = 0.69417
 DPO_FULL_TP_PP_EXCEPTED_RESULT = [[10564, 10564, 102954, 47231, 47231, 47231, 47231, 47231, 47231, 47231]]
 
 DPO_LORA_TP_PP_EXCEPTED_LOSS = 0.693147
-DPO_LORA_TP_PP_RESUME_EXCEPTED_LOSS = 0.693257
+DPO_LORA_TP_PP_RESUME_EXCEPTED_LOSS = 0.693146
 DPO_LORA_TP_PP_EXCEPTED_RESULT = [[51172, 37927, 96130, 27654, 133362, 95331, 133362, 30625, 95331, 4198]]
 
 DPO_FC_EXCEPTED_LOSS = 0.694
@@ -185,6 +185,8 @@ def test_dpo_lora(self):
             "max_steps": MAX_STEPS,
             "save_steps": SAVE_STEPS,
             "sharding": "stage1",
+            "fuse_attention_qkv": "true",
+            "fuse_attention_ffn": "true",
             "template": TEMPLATE,
         }
         config_path = os.path.join(CONFIG_PATH, "lora.yaml")

diff --git a/scripts/regression/test_pt_tiny-random-glm4moe.py b/scripts/regression/test_pt_tiny-random-glm4moe.py
@@ -35,16 +35,16 @@
 PT_FULL_RESUME_EXCEPTED_LOSS = 12.830642
 PT_FULL_EXCEPTED_RESULT = [[10564, 10564, 102954, 47231, 47231, 47231, 47231, 47231, 47231, 47231]]
 
-PT_LORA_EXCEPTED_LOSS = 12.832637
-PT_LORA_RESUME_EXCEPTED_LOSS = 12.832492
+PT_LORA_EXCEPTED_LOSS = 12.830773
+PT_LORA_RESUME_EXCEPTED_LOSS = 12.83066
 PT_LORA_EXCEPTED_RESULT = [[51172, 37927, 96130, 27654, 133362, 95331, 133362, 30625, 95331, 4198]]
 
 PT_FULL_TP_PP_EXCEPTED_LOSS = 12.83085
 PT_FULL_TP_PP_RESUME_EXCEPTED_LOSS = 12.830748
 PT_FULL_TP_PP_EXCEPTED_RESULT = [[10564, 10564, 102954, 47231, 47231, 47231, 47231, 47231, 47231, 47231]]
 
-PT_LORA_TP_PP_EXCEPTED_LOSS = 12.832589
-PT_LORA_TP_PP_RESUME_EXCEPTED_LOSS = 12.832575
+PT_LORA_TP_PP_EXCEPTED_LOSS = 12.830850
+PT_LORA_TP_PP_RESUME_EXCEPTED_LOSS = 12.830851
 PT_LORA_TP_PP_EXCEPTED_RESULT = [[51172, 37927, 96130, 27654, 133362, 95331, 27654, 133362, 115845, 115845]]
 
 PT_FC_EXCEPTED_LOSS = 11.931005

diff --git a/scripts/regression/test_sft_tiny-random-glm4moe.py b/scripts/regression/test_sft_tiny-random-glm4moe.py
@@ -37,16 +37,16 @@
 SFT_FULL_RESUME_EXCEPTED_LOSS = 12.717552
 SFT_FULL_EXCEPTED_RESULT = [[10564, 10564, 102954, 47231, 47231, 47231, 47231, 47231, 47231, 47231]]
 
-SFT_LORA_EXCEPTED_LOSS = 12.725744
-SFT_LORA_RESUME_EXCEPTED_LOSS = 12.72543
+SFT_LORA_EXCEPTED_LOSS = 12.718987
+SFT_LORA_RESUME_EXCEPTED_LOSS = 12.717308
 SFT_LORA_EXCEPTED_RESULT = [[51172, 37927, 96130, 27654, 133362, 95331, 27654, 133362, 115845, 115845]]
 
 SFT_FULL_TP_PP_EXCEPTED_LOSS = 12.789069
 SFT_FULL_TP_PP_RESUME_EXCEPTED_LOSS = 12.789183
 SFT_FULL_TP_PP_EXCEPTED_RESULT = [[10564, 10564, 102954, 47231, 47231, 47231, 47231, 47231, 47231, 47231]]
 
-SFT_LORA_TP_PP_EXCEPTED_LOSS = 12.794643
-SFT_LORA_TP_PP_RESUME_EXCEPTED_LOSS = 12.794622
+SFT_LORA_TP_PP_EXCEPTED_LOSS = 12.789069
+SFT_LORA_TP_PP_RESUME_EXCEPTED_LOSS = 12.788974
 SFT_LORA_TP_PP_EXCEPTED_RESULT = [[51172, 37927, 96130, 27654, 133362, 95331, 27654, 133362, 115845, 115845]]
 
 SFT_FC_EXCEPTED_LOSS = 12.936313

diff --git a/tests/config/ci/glm45_dpo_lora.yaml b/tests/config/ci/glm45_dpo_lora.yaml
@@ -0,0 +1,73 @@
+### data
+train_dataset_type: erniekit
+eval_dataset_type: erniekit
+train_dataset_path: ./dpo_train.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./dpo_eval.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 2048
+packing: true
+mix_strategy: concat
+
+### model
+model_name_or_path: zai-org/GLM-4.5-Air-Base/
+#attn_impl: sdpa
+attn_impl: flashmask
+
+### finetuning
+# base
+stage: DPO
+params_dtype: bfloat16
+fine_tuning: lora
+seed: 23
+bf16: true
+fp16_opt_level: O2
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+num_train_epochs: 1
+#use_attn_mask_startend_row_indices: false
+use_attn_mask_startend_row_indices: true
+#tensor_model_parallel_size: 2
+max_steps: 10
+eval_steps: 100000
+evaluation_strategy: steps
+#pipeline_model_parallel_size: 1
+continue_training: true
+save_steps: 100000
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 4
+logging_dir: ./vdl_log
+output_dir: ./checkpoints/glm4_hf_dpo_lora_ckpts
+disable_tqdm: true
+eval_accumulation_steps: 16
+tensor_model_parallel_size: 4
+pipeline_model_parallel_size: 2
+expert_model_parallel_size: 4
+use_expert_parallel: true
+sequence_parallel: true
+
+moe_group: mp
+moe_token_dispatcher_type: "deepep"
+gated_linear_unit: true
+hidden_dropout_prob: 0.0
+attention_dropout: 0.0
+recompute_granularity: ""
+amp_master_grad: true
+
+# train
+warmup_steps: 20
+learning_rate: 1.0e-5
+save_checkpoint_format: flex_checkpoint
+load_checkpoint_format: flex_checkpoint
+moe_router_force_load_balancing: false
+clear_every_step_cache: true
+partial_send_recv: false
+#use_cpu_initialization: true
+fuse_attention_qkv: true
+fuse_attention_ffn: true
+
+num_hidden_layers: 3
+num_empty_layers_add_in_tail: 1
diff --git a/tests/integration_test/glm45_dpo_lora.sh b/tests/integration_test/glm45_dpo_lora.sh
@@ -0,0 +1,110 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -exo pipefail
+export root_dir=$(pwd)
+
+if [ -f 'PaddleFleet/.venv/bin/activate' ]; then
+   source PaddleFleet/.venv/bin/activate
+fi
+
+cd $root_dir/glm45_fleet
+export cur_dir=$(pwd)
+
+# prepare dpo data
+wget https://paddle-qa.bj.bcebos.com/fleet/fleet_dpo.tar
+tar -xf fleet_dpo.tar
+
+config_dpo_yaml=$root_dir/PaddleFormers/tests/config/ci/glm45_dpo_lora.yaml
+
+config_json=$CACHE_DIR/glm45/GLM-4.5-Air/config.json
+
+yq '.train_dataset_path = strenv(cur_dir) + "/dpo_data/dpo_train.jsonl"
+    | .eval_dataset_path = strenv(cur_dir) + "/dpo_data/dpo_eval.jsonl"
+    | .model_name_or_path = strenv(CACHE_DIR) + "/zai-org/GLM-4.5-Air-Base"
+    | .logging_dir = strenv(cur_dir) + "/glm_full_dpo_lora_vdl_log"
+    | .output_dir = strenv(cur_dir) + "/checkpoints/glm_full_dpo_lora_ckpts"' \
+   $config_dpo_yaml > ${config_dpo_yaml}.tmp
+mv ${config_dpo_yaml}.tmp $config_dpo_yaml
+
+rm -rf ./outputs
+rm -rf paddleformers_dist_log
+master=$(hostname -i)
+port=36677
+
+export FLAGS_use_stride_compute_kernel=False
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+
+unset http_proxy https_proxy
+
+export FLAGS_embedding_deterministic=1
+export FLAGS_cudnn_deterministic=1
+
+log_file=glm45_dpo_lora.txt
+gt_loss_file=glm45_dpo_lora_multi_card_gt_loss.txt
+
+set +e
+NNODES=1 MASTER_ADDR=$master MASTER_PORT=$port coverage run $(which paddleformers-cli) train $config_dpo_yaml 2>&1 | tee ./${log_file}
+sft_exit_code=$?
+if [ $sft_exit_code -ne 0 ]; then
+   echo "GLM4.5 multi-cards training failed, try to check the log file"
+   python $root_dir/PaddleFormers/tests/check_log_for_exitcode.py ./${log_file} "***** eval metrics *****"
+   sft_check_exit_code=$?
+   if [ $sft_check_exit_code -ne 0 ]; then
+     echo "Failed to find 'Training completed' in log file."
+     exit 1
+   else
+     echo "Log check passed."
+   fi
+fi
+
+export repo_name=$(echo $GITHUB_REPO_NAME | awk -F'/' '{print $2}')
+# if [[ "${PP}" == "rel" ]]; then
+#   export pppatch="_PPrel"
+# fi
+# if [[ "${PF}" == rel* ]]; then
+#   export pfpatch="rel"
+# fi
+wget --no-proxy --no-check-certificate https://xly-devops.cdn.bcebos.com/PaddleFleet/precision/${repo_name}${pfpatch}${pppatch}_latest/${gt_loss_file}
+if [ $? -ne 0 ]; then
+  echo "To request precision checks for new models, please contact swgu98."
+  exit 1
+fi
+
+log_loss_file=${log_file%.*}_loss.${log_file##*.}
+python $root_dir/PaddleFormers/tests/integration_test/check_loss.py \
+   --compare_step 10 \
+   --log_file ./${log_file} \
+   --log_loss_file ./${log_loss_file} \
+   --gt_file ./${gt_loss_file}
+
+if [ $? -ne 0 ]; then
+  pushd $root_dir/PaddleFormers
+  source /root/proxy
+  bash $root_dir/PaddleFormers/tests/integration_test/check_precision_approval.sh
+  if [ $? -ne 0 ]; then
+    echo -e "\033[31mThe precision has been changed and requires approvals.\033[0m"
+    exit 1
+  fi
+  popd
+  rm ${gt_loss_file} && mv ${log_loss_file} ${gt_loss_file}
+  if [ ! -f precision_list.txt ]; then
+    wget --no-proxy --no-check-certificate https://paddle-github-action.cdn.bcebos.com/PaddleFleet/precision/${repo_name}${pfpatch}${pppatch}/${PR_ID}/precision_list.txt
+    if [ $? -ne 0 ]; then
+      wget --no-proxy --no-check-certificate https://xly-devops.cdn.bcebos.com/PaddleFleet/precision/${repo_name}${pfpatch}${pppatch}_latest/precision_list.txt
+      python $root_dir/bos/BosClient.py precision_list.txt paddle-github-action/PaddleFleet/precision/${repo_name}${pfpatch}${pppatch}/${PR_ID}
+    fi
+  fi
+  python $root_dir/bos/BosClient.py ${gt_loss_file} paddle-github-action/PaddleFleet/precision/${repo_name}${pfpatch}${pppatch}/${PR_ID}
+fi