Skip to content

Commit ea1ce8c

Browse files
committed
fix param
Signed-off-by: jiangyunfan1 <jiangyunfan1@h-partners.com>
1 parent b07c6b3 commit ea1ce8c

File tree

1 file changed

+22
-16
lines changed

1 file changed

+22
-16
lines changed

tests/e2e/weekly/single_node/models/test_qwen3_30b_acc.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
from tests.e2e.conftest import RemoteOpenAIServer, MooncakeLauncher
2525
from tools.aisbench import run_aisbench_cases, maybe_download_from_modelscope
2626

27-
2827
MODELS = [
2928
"vllm-ascend/Qwen3-30B-A3B-W8A8",
3029
]
@@ -93,30 +92,37 @@ async def test_models(model: str, tp_size: int) -> None:
9392
"kv_connector": "AscendStoreConnector",
9493
"kv_role": "kv_both",
9594
"kv_connector_extra_config": {
96-
"register_buffer": True,
97-
"use_layerwise": False,
98-
"mooncake_rpc_port":"0"
95+
"register_buffer": True,
96+
"use_layerwise": False,
97+
"mooncake_rpc_port": "0"
9998
}
10099
}
101-
speculative_config = {"method": "eagle3","model": eagle_model, "num_speculative_tokens": 3}
100+
speculative_config = {
101+
"method": "eagle3",
102+
"model": eagle_model,
103+
"num_speculative_tokens": 3
104+
}
102105
server_args = [
103-
"--trust-remote-code", "--max-num-seqs", "100", "--max-model-len", "37364",
104-
"--max-num-batched-tokens", "16384", "--tensor-parallel-size",
106+
"--trust-remote-code", "--max-num-seqs", "100", "--max-model-len",
107+
"37364", "--max-num-batched-tokens", "16384", "--tensor-parallel-size",
105108
str(tp_size), "--enable-expert-parallel", "--port",
106-
str(port), "--distributed_executor_backend", "mp", "--async-scheduling", "True",
107-
"--quantization", "ascend", "--compilation-config", '{"cudagraph_mode": "FULL_DECODE_ONLY"}',
108-
"--gpu-memory-utilization", "0.95", "--speculative-config", json.dumps(speculative_config),
109-
"--kv-transfer-config", json.dumps(kv_transfer_config)
109+
str(port), "--distributed_executor_backend", "mp",
110+
"--async-scheduling", "--quantization", "ascend",
111+
"--compilation-config", '{"cudagraph_mode": "FULL_DECODE_ONLY"}',
112+
"--gpu-memory-utilization", "0.95", "--speculative-config",
113+
json.dumps(speculative_config), "--kv-transfer-config",
114+
json.dumps(kv_transfer_config)
110115
]
111116
request_keyword_args: dict[str, Any] = {
112117
**api_keyword_args,
113118
}
114-
with MooncakeLauncher(mooncake_port, mooncake_metrics_port) as mooncake_server:
119+
with MooncakeLauncher(mooncake_port,
120+
mooncake_metrics_port) as mooncake_server:
115121
with RemoteOpenAIServer(model,
116-
server_args,
117-
server_port=port,
118-
env_dict=env_dict,
119-
auto_port=False) as server:
122+
server_args,
123+
server_port=port,
124+
env_dict=env_dict,
125+
auto_port=False) as server:
120126
client = server.get_async_client()
121127
for _ in range(2):
122128
batch = await client.completions.create(

0 commit comments

Comments
 (0)