[megatron] fix: patch support newer mcore version

HollowMan6 · HollowMan6 · commit 0c15d6b20f8a · 2026-02-23T12:05:22.000+08:00
Tested on NVIDIA/Megatron-LM@bbbedbb Signed-off-by: Hollow Man <hollowman@opensuse.org>
diff --git a/verl/models/mcore/patch.py b/verl/models/mcore/patch.py
@@ -258,13 +258,14 @@ def patch_forward(
         # Get the query, key and value tensors based on the type of attention -
         # self or cross attn.
         # query: [96, 1, 16, 128], key:[96, 1, 16, 128], value:[96, 1, 16, 128]
-        query, key, value = self.get_query_key_value_tensors(
+        qkv = self.get_query_key_value_tensors(
             hidden_states,
             key_value_states,
             position_ids,
             packed_seq_params,
             inference_context=inference_context,
         )
+        query, key, value = qkv[:3]
 
         # ===================================================
         # Adjust key, value for inference
@@ -329,7 +330,11 @@ def patch_forward(
 
         return output, bias
 
-    MLASelfAttention.get_query_key_value_tensors = patch_get_query_key_value_tensors
+    # This patch targets mcore 0.12 MLA behavior only.
+    # For newer mcore, upstream MLA already has packed-seq + CP handling and
+    # overriding it with the legacy implementation can break RoPE shapes.
+    if not mcore_ge_013:
+        MLASelfAttention.get_query_key_value_tensors = patch_get_query_key_value_tensors
 
     MultiLatentAttention.forward = patch_forward