Skip to content

Commit ce4d53d

Browse files
committed
Rename modelopt modules and modify vllm patch logic
1 parent 6e815f8 commit ce4d53d

File tree

7 files changed

+37
-878
lines changed

7 files changed

+37
-878
lines changed

verl/utils/modelopt/__init__.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,26 +19,38 @@
1919
Supports NVFP4 quantization with Megatron QAT training + vLLM low-precision inference.
2020
2121
Module Structure:
22-
- qat.py: QAT quantization config, apply_qat, QuantizationMetadata
22+
- quantize.py: Quantization config builder, apply_qat, QuantizationMetadata
2323
- weight_processor.py: QATWeightPostProcessor for converting QAT weights to quantized format
24-
- vllm_patch.py: vLLM monkey patches for NVFP4 inference (Linear, MoE, KV Cache)
24+
- vllm_modelopt_patch.py: vLLM monkey patches for ModelOpt NVFP4 inference (Linear, MoE, KV Cache)
2525
2626
Usage:
2727
# Training side
2828
from verl.utils.modelopt import apply_qat, QATWeightPostProcessor
2929
30-
# Inference side
31-
from verl.utils.modelopt import apply_vllm_modelopt_patches
30+
# Inference side (dynamic weight reload lifecycle)
31+
from verl.utils.modelopt import apply_modelopt_nvfp4_patches, prepare_modelopt_for_weight_reload, modelopt_process_weights_after_loading
3232
"""
3333

34-
from verl.utils.modelopt.qat import NVFP4_WEIGHT_ONLY_CFG, QuantizationMetadata, apply_qat
35-
from verl.utils.modelopt.vllm_patch import apply_vllm_modelopt_patches
34+
from verl.utils.modelopt.quantize import (
35+
# DEFAULT_IGNORE_PATTERNS,
36+
QuantizationMetadata,
37+
apply_qat,
38+
build_quantize_config,
39+
)
40+
from verl.utils.modelopt.vllm_modelopt_patch import (
41+
apply_modelopt_nvfp4_patches,
42+
modelopt_process_weights_after_loading,
43+
prepare_modelopt_for_weight_reload,
44+
)
3645
from verl.utils.modelopt.weight_processor import QATWeightPostProcessor
3746

47+
3848
__all__ = [
39-
"NVFP4_WEIGHT_ONLY_CFG",
49+
"build_quantize_config",
4050
"apply_qat",
4151
"QuantizationMetadata",
4252
"QATWeightPostProcessor",
43-
"apply_vllm_modelopt_patches",
53+
"apply_modelopt_nvfp4_patches",
54+
"prepare_modelopt_for_weight_reload",
55+
"modelopt_process_weights_after_loading",
4456
]

verl/utils/modelopt/qat.py

Lines changed: 0 additions & 81 deletions
This file was deleted.

0 commit comments

Comments
 (0)