Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/benchmark/oot_models_accuracy.json
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@
"model_path": "deepseek-ai/DeepSeek-R1-0528",
"extraArgs": "--data-parallel-size 8 --enable-expert-parallel",
"env_vars": "MORI_SHMEM_MODE=ISOLATION",
"runner": "linux-atom-mi35x-8",
"runner": "atom-plugin-acc-validation-runner",
Comment thread
junyyang-amd marked this conversation as resolved.
"test_level": "nightly",
"accuracy_threshold": 0.93,
"accuracy_baseline": 0.93,
Expand Down
28 changes: 28 additions & 0 deletions .github/benchmark/sglang_models_accuracy.json
Original file line number Diff line number Diff line change
Expand Up @@ -326,5 +326,33 @@
"accuracy_baseline": null,
"accuracy_baseline_model": "Qwen/Qwen3-32B-FP8",
"_baseline_note": "Threshold placeholder until MI308 gsm8k baseline is measured in CI."
},
{
"model_name": "MI308 GLM-5.1-FP8 TP8",
"model_path": "zai-org/GLM-5.1-FP8",
"extraArgs": "--trust-remote-code --tensor-parallel-size 8 --attention-backend aiter --kv-cache-dtype fp8_e4m3 --mem-fraction-static 0.8 --page-size 1 --disable-radix-cache",
"env_vars": "SGLANG_AITER_FP8_PREFILL_ATTN=0\nSGLANG_USE_AITER=1\nATOM_ENABLE_DS_QKNORM_QUANT_FUSION=1\nSGLANG_EXTERNAL_MODEL_PACKAGE=atom.plugin.sglang.models\nSGLANG_ENABLE_TORCH_COMPILE=1\nTORCHINDUCTOR_COMPILE_THREADS=128",
"runner": "atom-mi308-8gpu-plugins-benchmark",
"test_level": "nightly",
"lm_eval_num_fewshot": 20,
"lm_eval_num_concurrent": 64,
"accuracy_threshold": 0.93,
"accuracy_baseline": null,
"accuracy_baseline_model": "zai-org/GLM-5.1-FP8",
"_baseline_note": "Threshold aligned with the SGLANG accuracy validation workflow target for GLM-5.1-FP8 TP8 on MI308."
},
{
"model_name": "MI308 DeepSeek-V4-Flash",
"model_path": "deepseek-ai/DeepSeek-V4-Flash",
"extraArgs": "--trust-remote-code --tensor-parallel-size 8 --kv-cache-dtype fp8_e4m3 --mem-fraction-static 0.9 --swa-full-tokens-ratio 0.1 --max-running-requests 256 --page-size 256 --disable-radix-cache --disable-shared-experts-fusion --tool-call-parser deepseekv4 --reasoning-parser deepseek-v4",
"env_vars": "SGLANG_DEFAULT_SERVER_ARGS=\nAITER_BF16_FP8_MOE_BOUND=0\nATOM_MOE_GU_ITLV=1\nSGLANG_DEFAULT_THINKING=1\nSGLANG_DSV4_REASONING_EFFORT=max\nSGLANG_USE_AITER=1\nSGLANG_EXTERNAL_MODEL_PACKAGE=atom.plugin.sglang.models\nTORCHINDUCTOR_COMPILE_THREADS=128",
"runner": "atom-mi308-8gpu-plugins-benchmark",
"test_level": "nightly",
"lm_eval_num_fewshot": 5,
"lm_eval_num_concurrent": 8,
"accuracy_threshold": 0.94,
"accuracy_baseline": null,
"accuracy_baseline_model": "deepseek-ai/DeepSeek-V4-Flash",
"_baseline_note": "MI308 SGLang DeepSeek-V4-Flash coverage follows the SGLang DeepSeek-V4 5-shot GSM8K setup; refresh baseline after nightly measurements land."
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,8 @@ jobs:
MAX_WAIT_RETRIES: "120"
STREAM_SGLANG_LOGS: "1"
LM_EVAL_TASK: "gsm8k"
LM_EVAL_NUM_FEWSHOT: ${{ matrix.lm_eval_num_fewshot }}
LM_EVAL_NUM_CONCURRENT: ${{ matrix.lm_eval_num_concurrent }}
run: |
$CONTAINER_ENGINE exec \
-e SGLANG_MODEL_NAME="${SGLANG_MODEL_NAME}" \
Expand All @@ -363,6 +365,8 @@ jobs:
-e MAX_WAIT_RETRIES="${MAX_WAIT_RETRIES}" \
-e STREAM_SGLANG_LOGS="${STREAM_SGLANG_LOGS}" \
-e LM_EVAL_TASK="${LM_EVAL_TASK}" \
-e LM_EVAL_NUM_FEWSHOT="${LM_EVAL_NUM_FEWSHOT}" \
-e LM_EVAL_NUM_CONCURRENT="${LM_EVAL_NUM_CONCURRENT}" \
"$CONTAINER_NAME" bash -lc "
set -euo pipefail
bash .github/scripts/atom_sglang_test.sh accuracy
Expand Down
22 changes: 21 additions & 1 deletion .github/workflows/atom-sglang-accuracy-validation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ on:
type: boolean
default: false
run_mi308_all:
description: "Run all MI308 gsm8k accuracy cases (Qwen on atom-mi308-8gpu-plugins-benchmark)"
description: "Run all MI308 gsm8k accuracy cases on atom-mi308-8gpu-plugins-benchmark"
required: false
type: boolean
default: false
Expand Down Expand Up @@ -448,6 +448,26 @@ jobs:
"env_vars": "SGLANG_DEFAULT_SERVER_ARGS=\nSGLANG_EXTERNAL_MODEL_PACKAGE=atom.plugin.sglang.models\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=0",
"runner": "atom-mi308-8gpu-plugins-benchmark",
},
{
"model_name": "MI308 GLM-5.1-FP8 TP8",
"model_path": "zai-org/GLM-5.1-FP8",
"extra_args": "--trust-remote-code --tensor-parallel-size 8 --attention-backend aiter --kv-cache-dtype fp8_e4m3 --mem-fraction-static 0.8 --page-size 1 --disable-radix-cache",
"lm_eval_num_fewshot": 20,
"lm_eval_num_concurrent": 64,
"accuracy_test_threshold": 0.93,
"env_vars": "SGLANG_AITER_FP8_PREFILL_ATTN=0\nSGLANG_USE_AITER=1\nATOM_ENABLE_DS_QKNORM_QUANT_FUSION=1\nSGLANG_EXTERNAL_MODEL_PACKAGE=atom.plugin.sglang.models\nSGLANG_ENABLE_TORCH_COMPILE=1\nTORCHINDUCTOR_COMPILE_THREADS=128",
"runner": "atom-mi308-8gpu-plugins-benchmark",
},
{
"model_name": "MI308 DeepSeek-V4-Flash",
"model_path": "deepseek-ai/DeepSeek-V4-Flash",
"extra_args": "--trust-remote-code --tensor-parallel-size 8 --kv-cache-dtype fp8_e4m3 --mem-fraction-static 0.9 --swa-full-tokens-ratio 0.1 --max-running-requests 256 --page-size 256 --disable-radix-cache --disable-shared-experts-fusion --tool-call-parser deepseekv4 --reasoning-parser deepseek-v4",
"lm_eval_num_fewshot": 5,
"lm_eval_num_concurrent": 8,
"accuracy_test_threshold": 0.94,
"env_vars": "SGLANG_DEFAULT_SERVER_ARGS=\nAITER_BF16_FP8_MOE_BOUND=0\nATOM_MOE_GU_ITLV=1\nSGLANG_DEFAULT_THINKING=1\nSGLANG_DSV4_REASONING_EFFORT=max\nSGLANG_USE_AITER=1\nSGLANG_EXTERNAL_MODEL_PACKAGE=atom.plugin.sglang.models\nTORCHINDUCTOR_COMPILE_THREADS=128",
"runner": "atom-mi308-8gpu-plugins-benchmark",
},
]
models.extend(mi308_models)

Expand Down
Loading