Skip to content
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .github/workflows/fleet-model-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,21 @@ jobs:
fi
'

- name: GLM4.5 dpo_lora
if: (success() || failure()) && steps.formers_install.conclusion == 'success'
run: |
docker exec -t ${{ env.container_name }} /bin/bash -ce '
source /root/proxy
timeout 5m bash -x PaddleFormers/tests/integration_test/glm45_dpo_lora.sh
glm45_exit_code=$?
if [[ "$glm45_exit_code" != "0" ]]; then
echo -e "::error:: \033[31mIntegration test failed: GLM4.5 dpo lora.\033[0m"
exit 1
else
echo -e "\033[32mIntegration test succeeded: GLM4.5 dpo lora.\033[0m"
fi
'

- name: GLM4.5 pre-train (FP8)
if: (success() || failure()) && steps.formers_install.conclusion == 'success'
run: |
Expand Down
22 changes: 16 additions & 6 deletions paddleformers/cli/train/dpo/dpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from paddle.distributed import fleet

from paddleformers.nn.criterion import CriterionLayer
from paddleformers.peft import LoRAModel
from paddleformers.peft.lora.lora_model import AVAILABLE_LAYERS
from paddleformers.trainer import Trainer
from paddleformers.transformers.model_utils import unwrap_model
Expand Down Expand Up @@ -227,8 +228,14 @@ def _wrap_ref_model(self, model):

def _wrap_model(self, model, training=True):
"""Wrap model."""
if is_paddlefleet_available() and isinstance(model, PaddleFleetPipelineLayer):
model._prepare_pipeline_inputs_func = _prepare_pipeline_dpo_inputs_func_fleet
if is_paddlefleet_available() and (
isinstance(model, PaddleFleetPipelineLayer)
or (isinstance(model, LoRAModel) and isinstance(model.model, PaddleFleetPipelineLayer))
):
if isinstance(model, LoRAModel):
model.model._prepare_pipeline_inputs_func = _prepare_pipeline_dpo_inputs_func_fleet
else:
model._prepare_pipeline_inputs_func = _prepare_pipeline_dpo_inputs_func_fleet
model = super()._wrap_model(model, training)
return model

Expand Down Expand Up @@ -351,7 +358,8 @@ def fleet_prediction_pipeline_step(
with self.autocast_smart_context_manager():
model.eval_batch(data=[inputs, labels], compute_loss=True)
self.enable_lora(model)
model._p2p_helper.clear_meta_cache()
if hasattr(model, "_p2p_helper"):
model._p2p_helper.clear_meta_cache()
model.train()
else:
ref_model = self.ref_model_wrapped
Expand All @@ -367,8 +375,9 @@ def fleet_prediction_pipeline_step(
else:
reference_chosen_logps = [paddle.zeros([1]) for _ in range(model.accumulate_steps)]
reference_rejected_logps = [paddle.zeros([1]) for _ in range(model.accumulate_steps)]
if ref_model.is_pipeline_last_stage(ignore_virtual=ref_model._layers._num_virtual_pipeline_stages > 1):
if is_paddlefleet_available() and isinstance(ref_model, PaddleFleetParallelBase):

if model.is_pipeline_last_stage(ignore_virtual=model._layers._num_virtual_pipeline_stages > 1):
if is_paddlefleet_available() and isinstance(model, PaddleFleetParallelBase):
labels = fleet_merge_dpo_labels(labels, (reference_chosen_logps, reference_rejected_logps))
else:
labels = labels[:-2] + (reference_chosen_logps, reference_rejected_logps)
Expand Down Expand Up @@ -567,7 +576,8 @@ def training_pipeline_step(self, model, inputs):
with self.autocast_smart_context_manager():
model.eval_batch(data=[inputs, labels], compute_loss=True)
self.enable_lora(model)
model._p2p_helper.clear_meta_cache()
if hasattr(model, "_p2p_helper"):
model._p2p_helper.clear_meta_cache()
model.train()
else:
ref_model = self.ref_model_wrapped
Expand Down
2 changes: 2 additions & 0 deletions paddleformers/peft/lora/lora_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,8 @@ def save_pretrained(self, save_directory: str, merge_tensor_parallel: bool = Fal
if n_replace > 0:
aoa_config["aoa_statements"].append(f"{key} -> {key_new}")
break
if hasattr(self.model, "_gen_lora_inv_aoa_config"):
aoa_config["aoa_statements"] += self.model._gen_lora_inv_aoa_config(self.model.config)

HFFormatFullParamSaver(model_to_save, aoa_config).save_checkpoint(
save_directory, max_shard_size, save_peft=True
Expand Down
7 changes: 6 additions & 1 deletion paddleformers/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1210,6 +1210,7 @@ def bf16_filtered_sharded_state_dict(sharded_state_dict):
model_sharded_state_dict = bf16_filtered_sharded_state_dict(model_sharded_state_dict)
# NOTE(xingmingyyj) When saving model states only in float32 format, we assume that users
# will not use AOA to change the mapping relationships among these float32 weights.

dist.load_state_dict(
model_sharded_state_dict,
model_states_path,
Expand Down Expand Up @@ -4430,7 +4431,11 @@ def evaluation_loop(
model = self.model_wrapped
if _prepare_pipeline_inputs_func is not None:
model._prepare_pipeline_inputs_func = _prepare_pipeline_inputs_func
elif is_paddlefleet_available() and isinstance(self.model, GPTModel):
elif (
is_paddlefleet_available()
and isinstance(self.model, GPTModel)
or (isinstance(self.model, LoRAModel) and isinstance(self.model.model, GPTModel))
):
model = self.model_wrapped
else:
model = self.model
Expand Down
2 changes: 1 addition & 1 deletion paddleformers/transformers/auto/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def _get_model_class_from_config(cls, pretrained_model_name_or_path, config_file
init_class = cls._name_mapping[model_name + "_Import_Class"]
class_name = cls._name_mapping[init_class]
import_class = importlib.import_module(f"paddleformers.transformers.{class_name}.modeling")
if is_lora:
if is_lora and class_name in ["qwen3_vl_moe"]:
try:
model_class = getattr(import_class, init_class + "Decapitated")
return model_class
Expand Down
19 changes: 19 additions & 0 deletions paddleformers/transformers/qwen3_vl/modeling_fleet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1406,6 +1406,25 @@ def _gen_inv_aoa_config(cls, config: Qwen3VLConfig):

return aoa_config

@classmethod
def _gen_lora_inv_aoa_config(cls, config: Qwen3VLConfig):
aoa_statements = [
state
for layer_id in range(config.text_config.num_hidden_layers)
for state in (
f"model.language_model.{layer_id + 1}.mlp.down_proj.lora_A -> model.language_model.layers.{layer_id}.mlp.down_proj.lora_A",
f"model.language_model.{layer_id + 1}.mlp.down_proj.lora_B -> model.language_model.layers.{layer_id}.mlp.down_proj.lora_B",
f"model.language_model.{layer_id + 1}.mlp.up_gate_proj.lora_A -> model.language_model.layers.{layer_id}.mlp.up_gate_proj.lora_A",
f"model.language_model.{layer_id + 1}.mlp.up_gate_proj.lora_B -> model.language_model.layers.{layer_id}.mlp.up_gate_proj.lora_B",
f"model.language_model.{layer_id + 1}.self_attn.o_proj.lora_A -> model.language_model.layers.{layer_id}.self_attn.o_proj.lora_A",
f"model.language_model.{layer_id + 1}.self_attn.o_proj.lora_B -> model.language_model.layers.{layer_id}.self_attn.o_proj.lora_B",
f"model.language_model.{layer_id + 1}.self_attn.qkv_proj.lora_A -> model.language_model.layers.{layer_id}.self_attn.qkv_proj.lora_A",
f"model.language_model.{layer_id + 1}.self_attn.qkv_proj.lora_B -> model.language_model.layers.{layer_id}.self_attn.qkv_proj.lora_B",
)
]

return aoa_statements


class Qwen3VLModel(Qwen3VLPretrainedModelFleet):
config_class = Qwen3VLConfig
Expand Down
6 changes: 4 additions & 2 deletions scripts/regression/test_dpo_tiny-random-glm4moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@
DPO_FULL_EXCEPTED_RESULT = [[10564, 10564, 102954, 47231, 47231, 47231, 47231, 47231, 47231, 47231]]

DPO_LORA_EXCEPTED_LOSS = 0.693147
DPO_LORA_RESUME_EXCEPTED_LOSS = 0.691905
DPO_LORA_RESUME_EXCEPTED_LOSS = 0.693106
DPO_LORA_EXCEPTED_RESULT = [[51172, 37927, 96130, 27654, 133362, 95331, 27654, 133362, 115845, 115845]]

DPO_FULL_TP_PP_EXCEPTED_LOSS = 0.693147
DPO_FULL_TP_PP_RESUME_EXCEPTED_LOSS = 0.69417
DPO_FULL_TP_PP_EXCEPTED_RESULT = [[10564, 10564, 102954, 47231, 47231, 47231, 47231, 47231, 47231, 47231]]

DPO_LORA_TP_PP_EXCEPTED_LOSS = 0.693147
DPO_LORA_TP_PP_RESUME_EXCEPTED_LOSS = 0.693257
DPO_LORA_TP_PP_RESUME_EXCEPTED_LOSS = 0.693146
DPO_LORA_TP_PP_EXCEPTED_RESULT = [[51172, 37927, 96130, 27654, 133362, 95331, 133362, 30625, 95331, 4198]]

DPO_FC_EXCEPTED_LOSS = 0.694
Expand Down Expand Up @@ -185,6 +185,8 @@ def test_dpo_lora(self):
"max_steps": MAX_STEPS,
"save_steps": SAVE_STEPS,
"sharding": "stage1",
"fuse_attention_qkv": "true",
"fuse_attention_ffn": "true",
"template": TEMPLATE,
}
config_path = os.path.join(CONFIG_PATH, "lora.yaml")
Expand Down
8 changes: 4 additions & 4 deletions scripts/regression/test_pt_tiny-random-glm4moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,16 @@
PT_FULL_RESUME_EXCEPTED_LOSS = 12.830642
PT_FULL_EXCEPTED_RESULT = [[10564, 10564, 102954, 47231, 47231, 47231, 47231, 47231, 47231, 47231]]

PT_LORA_EXCEPTED_LOSS = 12.832637
PT_LORA_RESUME_EXCEPTED_LOSS = 12.832492
PT_LORA_EXCEPTED_LOSS = 12.830773
PT_LORA_RESUME_EXCEPTED_LOSS = 12.83066
PT_LORA_EXCEPTED_RESULT = [[51172, 37927, 96130, 27654, 133362, 95331, 133362, 30625, 95331, 4198]]

PT_FULL_TP_PP_EXCEPTED_LOSS = 12.83085
PT_FULL_TP_PP_RESUME_EXCEPTED_LOSS = 12.830748
PT_FULL_TP_PP_EXCEPTED_RESULT = [[10564, 10564, 102954, 47231, 47231, 47231, 47231, 47231, 47231, 47231]]

PT_LORA_TP_PP_EXCEPTED_LOSS = 12.832589
PT_LORA_TP_PP_RESUME_EXCEPTED_LOSS = 12.832575
PT_LORA_TP_PP_EXCEPTED_LOSS = 12.830850
PT_LORA_TP_PP_RESUME_EXCEPTED_LOSS = 12.830851
PT_LORA_TP_PP_EXCEPTED_RESULT = [[51172, 37927, 96130, 27654, 133362, 95331, 27654, 133362, 115845, 115845]]

PT_FC_EXCEPTED_LOSS = 11.931005
Expand Down
8 changes: 4 additions & 4 deletions scripts/regression/test_sft_tiny-random-glm4moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,16 @@
SFT_FULL_RESUME_EXCEPTED_LOSS = 12.717552
SFT_FULL_EXCEPTED_RESULT = [[10564, 10564, 102954, 47231, 47231, 47231, 47231, 47231, 47231, 47231]]

SFT_LORA_EXCEPTED_LOSS = 12.725744
SFT_LORA_RESUME_EXCEPTED_LOSS = 12.72543
SFT_LORA_EXCEPTED_LOSS = 12.718987
SFT_LORA_RESUME_EXCEPTED_LOSS = 12.717308
SFT_LORA_EXCEPTED_RESULT = [[51172, 37927, 96130, 27654, 133362, 95331, 27654, 133362, 115845, 115845]]

SFT_FULL_TP_PP_EXCEPTED_LOSS = 12.789069
SFT_FULL_TP_PP_RESUME_EXCEPTED_LOSS = 12.789183
SFT_FULL_TP_PP_EXCEPTED_RESULT = [[10564, 10564, 102954, 47231, 47231, 47231, 47231, 47231, 47231, 47231]]

SFT_LORA_TP_PP_EXCEPTED_LOSS = 12.794643
SFT_LORA_TP_PP_RESUME_EXCEPTED_LOSS = 12.794622
SFT_LORA_TP_PP_EXCEPTED_LOSS = 12.789069
SFT_LORA_TP_PP_RESUME_EXCEPTED_LOSS = 12.788974
SFT_LORA_TP_PP_EXCEPTED_RESULT = [[51172, 37927, 96130, 27654, 133362, 95331, 27654, 133362, 115845, 115845]]

SFT_FC_EXCEPTED_LOSS = 12.936313
Expand Down
73 changes: 73 additions & 0 deletions tests/config/ci/glm45_dpo_lora.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
### data
train_dataset_type: erniekit
eval_dataset_type: erniekit
train_dataset_path: ./dpo_train.jsonl
train_dataset_prob: "1.0"
eval_dataset_path: ./dpo_eval.jsonl
eval_dataset_prob: "1.0"
max_seq_len: 2048
packing: true
mix_strategy: concat

### model
model_name_or_path: zai-org/GLM-4.5-Air-Base/
#attn_impl: sdpa
attn_impl: flashmask

### finetuning
# base
stage: DPO
params_dtype: bfloat16
fine_tuning: lora
seed: 23
bf16: true
fp16_opt_level: O2
do_train: true
do_eval: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 1
num_train_epochs: 1
#use_attn_mask_startend_row_indices: false
use_attn_mask_startend_row_indices: true
#tensor_model_parallel_size: 2
max_steps: 10
eval_steps: 100000
evaluation_strategy: steps
#pipeline_model_parallel_size: 1
continue_training: true
save_steps: 100000
save_strategy: steps
logging_steps: 1
gradient_accumulation_steps: 4
logging_dir: ./vdl_log
output_dir: ./checkpoints/glm4_hf_dpo_lora_ckpts
disable_tqdm: true
eval_accumulation_steps: 16
tensor_model_parallel_size: 4
pipeline_model_parallel_size: 2
expert_model_parallel_size: 4
use_expert_parallel: true
sequence_parallel: true

moe_group: mp
moe_token_dispatcher_type: "deepep"
gated_linear_unit: true
hidden_dropout_prob: 0.0
attention_dropout: 0.0
recompute_granularity: ""
amp_master_grad: true

# train
warmup_steps: 20
learning_rate: 1.0e-5
save_checkpoint_format: flex_checkpoint
load_checkpoint_format: flex_checkpoint
moe_router_force_load_balancing: false
clear_every_step_cache: true
partial_send_recv: false
#use_cpu_initialization: true
fuse_attention_qkv: true
fuse_attention_ffn: true

num_hidden_layers: 3
num_empty_layers_add_in_tail: 1
110 changes: 110 additions & 0 deletions tests/integration_test/glm45_dpo_lora.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -exo pipefail
export root_dir=$(pwd)

if [ -f 'PaddleFleet/.venv/bin/activate' ]; then
source PaddleFleet/.venv/bin/activate
fi

cd $root_dir/glm45_fleet
export cur_dir=$(pwd)

# prepare dpo data
wget https://paddle-qa.bj.bcebos.com/fleet/fleet_dpo.tar
tar -xf fleet_dpo.tar

config_dpo_yaml=$root_dir/PaddleFormers/tests/config/ci/glm45_dpo_lora.yaml

config_json=$CACHE_DIR/glm45/GLM-4.5-Air/config.json

yq '.train_dataset_path = strenv(cur_dir) + "/dpo_data/dpo_train.jsonl"
| .eval_dataset_path = strenv(cur_dir) + "/dpo_data/dpo_eval.jsonl"
| .model_name_or_path = strenv(CACHE_DIR) + "/zai-org/GLM-4.5-Air-Base"
| .logging_dir = strenv(cur_dir) + "/glm_full_dpo_lora_vdl_log"
| .output_dir = strenv(cur_dir) + "/checkpoints/glm_full_dpo_lora_ckpts"' \
$config_dpo_yaml > ${config_dpo_yaml}.tmp
mv ${config_dpo_yaml}.tmp $config_dpo_yaml

rm -rf ./outputs
rm -rf paddleformers_dist_log
master=$(hostname -i)
port=36677

export FLAGS_use_stride_compute_kernel=False
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7

unset http_proxy https_proxy

export FLAGS_embedding_deterministic=1
export FLAGS_cudnn_deterministic=1

log_file=glm45_dpo_lora.txt
gt_loss_file=glm45_dpo_lora_multi_card_gt_loss.txt

set +e
NNODES=1 MASTER_ADDR=$master MASTER_PORT=$port coverage run $(which paddleformers-cli) train $config_dpo_yaml 2>&1 | tee ./${log_file}
sft_exit_code=$?
if [ $sft_exit_code -ne 0 ]; then
echo "GLM4.5 multi-cards training failed, try to check the log file"
python $root_dir/PaddleFormers/tests/check_log_for_exitcode.py ./${log_file} "***** eval metrics *****"
sft_check_exit_code=$?
if [ $sft_check_exit_code -ne 0 ]; then
echo "Failed to find 'Training completed' in log file."
exit 1
else
echo "Log check passed."
fi
fi

export repo_name=$(echo $GITHUB_REPO_NAME | awk -F'/' '{print $2}')
# if [[ "${PP}" == "rel" ]]; then
# export pppatch="_PPrel"
# fi
# if [[ "${PF}" == rel* ]]; then
# export pfpatch="rel"
# fi
wget --no-proxy --no-check-certificate https://xly-devops.cdn.bcebos.com/PaddleFleet/precision/${repo_name}${pfpatch}${pppatch}_latest/${gt_loss_file}
if [ $? -ne 0 ]; then
echo "To request precision checks for new models, please contact swgu98."
exit 1
fi

log_loss_file=${log_file%.*}_loss.${log_file##*.}
python $root_dir/PaddleFormers/tests/integration_test/check_loss.py \
--compare_step 10 \
--log_file ./${log_file} \
--log_loss_file ./${log_loss_file} \
--gt_file ./${gt_loss_file}

if [ $? -ne 0 ]; then
pushd $root_dir/PaddleFormers
source /root/proxy
bash $root_dir/PaddleFormers/tests/integration_test/check_precision_approval.sh
if [ $? -ne 0 ]; then
echo -e "\033[31mThe precision has been changed and requires approvals.\033[0m"
exit 1
fi
popd
rm ${gt_loss_file} && mv ${log_loss_file} ${gt_loss_file}
if [ ! -f precision_list.txt ]; then
wget --no-proxy --no-check-certificate https://paddle-github-action.cdn.bcebos.com/PaddleFleet/precision/${repo_name}${pfpatch}${pppatch}/${PR_ID}/precision_list.txt
if [ $? -ne 0 ]; then
wget --no-proxy --no-check-certificate https://xly-devops.cdn.bcebos.com/PaddleFleet/precision/${repo_name}${pfpatch}${pppatch}_latest/precision_list.txt
python $root_dir/bos/BosClient.py precision_list.txt paddle-github-action/PaddleFleet/precision/${repo_name}${pfpatch}${pppatch}/${PR_ID}
fi
fi
python $root_dir/bos/BosClient.py ${gt_loss_file} paddle-github-action/PaddleFleet/precision/${repo_name}${pfpatch}${pppatch}/${PR_ID}
fi
Loading