|
19 | 19 | Supports NVFP4 quantization with Megatron QAT training + vLLM low-precision inference. |
20 | 20 |
|
21 | 21 | Module Structure: |
22 | | -- qat.py: QAT quantization config, apply_qat, QuantizationMetadata |
| 22 | +- quantize.py: Quantization config builder, apply_qat, QuantizationMetadata |
23 | 23 | - weight_processor.py: QATWeightPostProcessor for converting QAT weights to quantized format |
24 | | -- vllm_patch.py: vLLM monkey patches for NVFP4 inference (Linear, MoE, KV Cache) |
| 24 | +- vllm_modelopt_patch.py: vLLM monkey patches for ModelOpt NVFP4 inference (Linear, MoE, KV Cache) |
25 | 25 |
|
26 | 26 | Usage: |
27 | 27 | # Training side |
28 | 28 | from verl.utils.modelopt import apply_qat, QATWeightPostProcessor |
29 | 29 |
|
30 | | - # Inference side |
31 | | - from verl.utils.modelopt import apply_vllm_modelopt_patches |
| 30 | + # Inference side (dynamic weight reload lifecycle) |
| 31 | + from verl.utils.modelopt import apply_modelopt_nvfp4_patches, prepare_modelopt_for_weight_reload, modelopt_process_weights_after_loading |
32 | 32 | """ |
33 | 33 |
|
34 | | -from verl.utils.modelopt.qat import NVFP4_WEIGHT_ONLY_CFG, QuantizationMetadata, apply_qat |
35 | | -from verl.utils.modelopt.vllm_patch import apply_vllm_modelopt_patches |
| 34 | +from verl.utils.modelopt.quantize import ( |
| 35 | + # DEFAULT_IGNORE_PATTERNS, |
| 36 | + QuantizationMetadata, |
| 37 | + apply_qat, |
| 38 | + build_quantize_config, |
| 39 | +) |
| 40 | +from verl.utils.modelopt.vllm_modelopt_patch import ( |
| 41 | + apply_modelopt_nvfp4_patches, |
| 42 | + modelopt_process_weights_after_loading, |
| 43 | + prepare_modelopt_for_weight_reload, |
| 44 | +) |
36 | 45 | from verl.utils.modelopt.weight_processor import QATWeightPostProcessor |
37 | 46 |
|
| 47 | + |
38 | 48 | __all__ = [ |
39 | | - "NVFP4_WEIGHT_ONLY_CFG", |
| 49 | + "build_quantize_config", |
40 | 50 | "apply_qat", |
41 | 51 | "QuantizationMetadata", |
42 | 52 | "QATWeightPostProcessor", |
43 | | - "apply_vllm_modelopt_patches", |
| 53 | + "apply_modelopt_nvfp4_patches", |
| 54 | + "prepare_modelopt_for_weight_reload", |
| 55 | + "modelopt_process_weights_after_loading", |
44 | 56 | ] |
0 commit comments