From 77b5ed64e300e51ffac96df0cc11e9bf365b1ab5 Mon Sep 17 00:00:00 2001
From: Pamela <179191831+pamelap-nvidia@users.noreply.github.com>
Date: Thu, 4 Jun 2026 23:04:08 +0000
Subject: [PATCH] update spark qa/ci tests

Signed-off-by: Pamela <179191831+pamelap-nvidia@users.noreply.github.com>
---
 .../test_lists/qa/llm_spark_func.yml          | 29 -------------------
 .../test_lists/qa/llm_spark_perf.yml          | 15 ----------
 .../test_lists/test-db/l0_gb10.yml            | 20 +++++++++++--
 3 files changed, 18 insertions(+), 46 deletions(-)

diff --git a/tests/integration/test_lists/qa/llm_spark_func.yml b/tests/integration/test_lists/qa/llm_spark_func.yml
index 249197b6ca40..dfb8aa7cb1cb 100644
--- a/tests/integration/test_lists/qa/llm_spark_func.yml
+++ b/tests/integration/test_lists/qa/llm_spark_func.yml
@@ -9,11 +9,6 @@ llm_spark_func:
         gte: 1
         lte: 1
   tests:
-  - test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-20B-gpt_oss/gpt-oss-20b]
-  - test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-120B-gpt_oss/gpt-oss-120b]
-  - test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-bf16-instruct-llama-3.1-model/Llama-3.1-8B-Instruct]
-  - test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-3.1-8B-Instruct-FP8]
-  - test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP4-modelopt-hf-model-hub/Llama-3.1-8B-Instruct-fp4]
   - test_e2e.py::test_ptp_quickstart_advanced[Qwen3-8b-fp8-Qwen3/nvidia-Qwen3-8B-FP8]
   - test_e2e.py::test_ptp_quickstart_advanced[Qwen3-8b-nvfp4-Qwen3/nvidia-Qwen3-8B-NVFP4]
   - test_e2e.py::test_ptp_quickstart_advanced[Qwen3-8B-bf16-Qwen3/Qwen3-8B]
@@ -22,38 +17,14 @@ llm_spark_func:
   - test_e2e.py::test_ptp_quickstart_advanced[Qwen3-14B-bf16-Qwen3/Qwen3-14B]
   - test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32B-bf16-Qwen3/Qwen3-32B]
   - test_e2e.py::test_ptp_quickstart_advanced[Qwen3-32b-nvfp4-Qwen3/nvidia-Qwen3-32B-NVFP4]
-  - test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B]
-  - test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf]
-  - test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-fp8-nvidia-Phi-4-reasoning-plus-FP8]
-  - test_e2e.py::test_ptp_quickstart_advanced[Phi4-Reasoning-Plus-nvfp4-nvidia-Phi-4-reasoning-plus-NVFP4]
-  - test_e2e.py::test_ptp_quickstart_advanced[Phi-4-reasoning-plus-bf16-Phi-4-reasoning-plus]
   - test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP8-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp8]
   - test_e2e.py::test_ptp_quickstart_advanced[Llama3.3-70B-FP4-modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp4]
   - test_e2e.py::test_ptp_quickstart_advanced[Llama-4-Scout-17B-16E-FP4-llama4-models/Llama-4-Scout-17B-16E-Instruct-FP4]
   - test_e2e.py::test_ptp_quickstart_advanced[DeepSeek-R1-Distill-Qwen-32B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-32B]
   - test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Nano-9B-v2-nvfp4-NVIDIA-Nemotron-Nano-9B-v2-NVFP4]
   - test_e2e.py::test_ptp_quickstart_advanced_eagle3[GPT-OSS-120B-Eagle3-gpt_oss/gpt-oss-120b-gpt_oss/gpt-oss-120b-Eagle3]
-  - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_auto_dtype
-  - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4
-  - accuracy/test_llm_api_pytorch_multimodal.py::TestQwen2_5_VL_7B::test_auto_dtype
-  - accuracy/test_llm_api_pytorch_multimodal.py::TestQwen2_5_VL_7B::test_fp8
-  - accuracy/test_llm_api_pytorch_multimodal.py::TestQwen2_5_VL_7B::test_nvfp4
-  - accuracy/test_llm_api_pytorch_multimodal.py::TestGemma3_27BInstruct::test_fp8_prequantized
-  - accuracy/test_llm_api_pytorch_multimodal.py::TestGemma3_27BInstruct::test_nvfp4_prequantized
-  - accuracy/test_llm_api_pytorch_multimodal.py::TestGemma3_12BInstruct::test_auto_dtype
-  - accuracy/test_llm_api_pytorch_multimodal.py::TestGemma3_12BInstruct::test_fp8_prequantized
-  - accuracy/test_llm_api_pytorch_multimodal.py::TestGemma3_12BInstruct::test_nvfp4_prequantized
-  - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=False]
-  - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=True]
   - test_e2e.py::test_trtllm_benchmark_serving[gpt_oss/gpt-oss-20b]
   - test_e2e.py::test_openai_chat_guided_decoding[meta-llama/Llama-3.1-8B-Instruct]
-  - examples/serve/test_serve.py::test_nemotron3_super_120b_nvfp4
-  - examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[text_reasoning_on]
-  - examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[text_reasoning_off]
-  - examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[text_streaming]
-  - examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[tool_calling]
-  - examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[image]
-  - examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[video]
 # ===============================================================================
 # 2: Multi-GPU (2 GPUs) Spark func cases with multinode support
 # ===============================================================================
diff --git a/tests/integration/test_lists/qa/llm_spark_perf.yml b/tests/integration/test_lists/qa/llm_spark_perf.yml
index 605466a982f6..514f884cad50 100644
--- a/tests/integration/test_lists/qa/llm_spark_perf.yml
+++ b/tests/integration/test_lists/qa/llm_spark_perf.yml
@@ -40,24 +40,9 @@ llm_spark_perf:
   - perf/test_perf.py::test_perf[qwen3_30b_a3b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
   - perf/test_perf.py::test_perf[llama_v4_scout_17b_16e_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
   - perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_v1.5_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
-  - perf/test_perf.py::test_perf[phi_4_reasoning_plus_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
-  - perf/test_perf.py::test_perf[phi_4_reasoning_plus_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
-  - perf/test_perf.py::test_perf[phi_4_reasoning_plus-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
   - perf/test_perf.py::test_perf[qwen3_32b_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
   - perf/test_perf.py::test_perf[qwen3_32b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
   - perf/test_perf.py::test_perf[deepseek_r1_distill_qwen_32b-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
-  - perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
-  - perf/test_perf.py::test_perf[phi_4_multimodal_instruct_fp4-bench-pytorch-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
-  - perf/test_perf.py::test_perf[phi_4_multimodal_instruct_fp8-bench-pytorch-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
-  - perf/test_perf.py::test_perf[qwen2_5_vl_7b_instruct-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
-  - perf/test_perf.py::test_perf[qwen2_5_vl_7b_instruct_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
-  - perf/test_perf.py::test_perf[qwen2_5_vl_7b_instruct_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
-  - perf/test_perf.py::test_perf[gemma_3_27b_it-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
-  - perf/test_perf.py::test_perf[gemma_3_27b_it_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
-  - perf/test_perf.py::test_perf[gemma_3_27b_it_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
-  - perf/test_perf.py::test_perf[gemma_3_12b_it-bench-pytorch-streaming-bfloat16-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
-  - perf/test_perf.py::test_perf[gemma_3_12b_it_fp8-bench-pytorch-streaming-float8-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
-  - perf/test_perf.py::test_perf[gemma_3_12b_it_fp4-bench-pytorch-streaming-float4-maxbs:1-input_output_len:2048,128-reqs:1-con:1]
 # ===============================================================================
 # 2: Multi-GPU (2 GPUs) Spark perf cases with multinode support
 # ===============================================================================
diff --git a/tests/integration/test_lists/test-db/l0_gb10.yml b/tests/integration/test_lists/test-db/l0_gb10.yml
index efc6a64530f9..9fb460ef3b56 100644
--- a/tests/integration/test_lists/test-db/l0_gb10.yml
+++ b/tests/integration/test_lists/test-db/l0_gb10.yml
@@ -17,9 +17,15 @@ l0_gb10:
   tests:
   # ------------- PyTorch tests ---------------
   - unittest/_torch/attention/test_attention_mla.py
-  - test_e2e.py::test_ptp_quickstart_bert[VANILLA-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
-  - test_e2e.py::test_ptp_quickstart_bert[TRTLLM-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
   - accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_bf16[latency]
+  - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_auto_dtype
+  - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=False]
+  - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=True]
+  - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4
+  - examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[text_reasoning_off]
+  - examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[text_streaming]
+  - examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[tool_calling]
+  - test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-120B-gpt_oss/gpt-oss-120b]
 - condition:
     ranges:
       system_gpu_count:
@@ -40,3 +46,13 @@ l0_gb10:
   - unittest/_torch/modeling -k "modeling_out_of_tree"
   - unittest/_torch/modules/moe/test_moe_module.py::test_configurable_moe_single_gpu[e8_k1_h512_i512-seq=8-dtype=torch.float16-backend=CUTLASS-quant=NVFP4-routing=Renormalize]
   - unittest/_torch/modules/moe/test_moe_module.py::test_configurable_moe_single_gpu[e8_k1_h512_i512-seq=8-dtype=torch.bfloat16-backend=CUTLASS-quant=NVFP4-routing=Renormalize]
+  - test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B]
+  - test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf]
+  - test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-bf16-instruct-llama-3.1-model/Llama-3.1-8B-Instruct]
+  - test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP8-llama-3.1-model/Llama-3.1-8B-Instruct-FP8]
+  - test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-FP4-modelopt-hf-model-hub/Llama-3.1-8B-Instruct-fp4]
+  - test_e2e.py::test_ptp_quickstart_advanced[GPT-OSS-20B-gpt_oss/gpt-oss-20b]
+  - examples/serve/test_serve.py::test_nemotron3_super_120b_nvfp4
+  - examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[text_reasoning_on]
+  - examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[image]
+  - examples/serve/test_serve.py::test_nemotron3_nano_omni_nvfp4[video]