diff --git a/.github/benchmark/sglang_benchmark_models.json b/.github/benchmark/sglang_benchmark_models.json index 0cb8c1b455..31a9e45aab 100644 --- a/.github/benchmark/sglang_benchmark_models.json +++ b/.github/benchmark/sglang_benchmark_models.json @@ -955,6 +955,7 @@ "prefix": "qwen3-32b-fp8-tp1-mi308", "extra_args": [ "--tensor-parallel-size 1", + "--page-size 16", "qwen_reasoning" ], "bench_args": "", @@ -971,6 +972,7 @@ "prefix": "qwen3-32b-fp8-tp8-mi308", "extra_args": [ "--tensor-parallel-size 8", + "--page-size 16", "qwen_reasoning" ], "bench_args": "", diff --git a/atom/plugin/sglang/runtime/model_arch.py b/atom/plugin/sglang/runtime/model_arch.py index cd791fc485..ac2d7de21b 100644 --- a/atom/plugin/sglang/runtime/model_arch.py +++ b/atom/plugin/sglang/runtime/model_arch.py @@ -86,6 +86,7 @@ def _install_deepseek_v4_adapters(model: Any) -> None: prepare_config=_prepare_kimi_k25_config, install_adapters=_install_deepseek_mla_adapters, ), + "Qwen3ForCausalLM": SGLangModelAdapterSpec(), "Qwen3MoeForCausalLM": SGLangModelAdapterSpec(), "Qwen3NextForCausalLM": SGLangModelAdapterSpec( wrapper_binds_gdn_context=True, @@ -114,6 +115,7 @@ def _install_deepseek_v4_adapters(model: Any) -> None: "DeepseekV3ForCausalLM", "DeepseekV32ForCausalLM", "GlmMoeDsaForCausalLM", + "Qwen3ForCausalLM", "Qwen3MoeForCausalLM", "Qwen3NextForCausalLM", "MiniMaxM2ForCausalLM",