Skip to content

Commit 3ac29fb

Browse files
committed
[CI] add long test
Signed-off-by: guozr <guozr1997@hotmail.com>
1 parent 41d48cb commit 3ac29fb

File tree

1 file changed

+10
-4
lines changed

1 file changed

+10
-4
lines changed

tests/e2e/multicard/2-cards/test_offline_inference_distributed.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -245,14 +245,19 @@ def test_qwen3_dense_prefetch_mlp_weight_tp2(model):
245245
@patch.dict(os.environ, {"ASCEND_AGGREGATE_ENABLE": "1"})
246246
@patch.dict(os.environ, {"HCCL_BUFFSIZE": "1024"})
247247
def test_deepseek3_2_w8a8_pruning_mtp_tp2_ep():
248-
example_prompts = [
249-
"Hello, my name is",
248+
short_example_prompts = [
249+
"Hello ",
250250
]
251-
max_tokens = 5
251+
# "max_position_embeddings": 163840,
252+
long_example_prompts = [
253+
"Hello " * (163839 - 500) + "Hello"
254+
]
255+
max_tokens = 500
252256
with VllmRunner("vllm-ascend/DeepSeek-V3.2-W8A8-Pruning",
253257
tensor_parallel_size=2,
254258
quantization="ascend",
255259
enable_expert_parallel=True,
260+
max_model_len=163840,
256261
compilation_config={
257262
"cudagraph_capture_sizes": [3, 6, 9, 12],
258263
"cudagraph_mode": "FULL_DECODE_ONLY"
@@ -266,7 +271,8 @@ def test_deepseek3_2_w8a8_pruning_mtp_tp2_ep():
266271
},
267272
reasoning_parser="deepseek_v3",
268273
tokenizer_mode="deepseek_v32") as vllm_model:
269-
vllm_model.generate_greedy(example_prompts, max_tokens)
274+
vllm_model.generate_greedy(short_example_prompts, max_tokens)
275+
vllm_model.generate_greedy(long_example_prompts, max_tokens)
270276

271277

272278
@pytest.mark.parametrize("model", QWEN_W4A4_MODELS)

0 commit comments

Comments
 (0)