diff --git a/Qwen-Qwen3.5-0.8B/builtin/cpu_and_mobile/text.json b/Qwen-Qwen3.5-0.8B/builtin/cpu_and_mobile/text.json index 8d852a78e..94a5d77b7 100644 --- a/Qwen-Qwen3.5-0.8B/builtin/cpu_and_mobile/text.json +++ b/Qwen-Qwen3.5-0.8B/builtin/cpu_and_mobile/text.json @@ -8,9 +8,9 @@ "type": "ModelBuilder", "precision": "int4", "int4_accuracy_level": 4, + "int4_algo_config": "k_quant_linear", "extra_options": { "filename": "text.onnx", - "quant_mode": "default", "prune_lm_head": true } }, diff --git a/Qwen-Qwen3.5-0.8B/builtin/cuda/text.json b/Qwen-Qwen3.5-0.8B/builtin/cuda/text.json index 41a85bc57..f15e47b68 100644 --- a/Qwen-Qwen3.5-0.8B/builtin/cuda/text.json +++ b/Qwen-Qwen3.5-0.8B/builtin/cuda/text.json @@ -8,9 +8,9 @@ "type": "ModelBuilder", "precision": "int4", "int4_accuracy_level": 4, + "int4_algo_config": "k_quant_linear", "extra_options": { "filename": "text.onnx", - "quant_mode": "default", "prune_lm_head": true } }, diff --git a/Qwen-Qwen3.5-0.8B/builtin/webgpu/text.json b/Qwen-Qwen3.5-0.8B/builtin/webgpu/text.json index de121e31b..0be773de4 100644 --- a/Qwen-Qwen3.5-0.8B/builtin/webgpu/text.json +++ b/Qwen-Qwen3.5-0.8B/builtin/webgpu/text.json @@ -8,10 +8,10 @@ "type": "ModelBuilder", "precision": "int4", "int4_accuracy_level": 4, + "int4_algo_config": "k_quant_linear", "extra_options": { "filename": "text.onnx", - "prune_lm_head": true, - "quant_mode": "default" + "prune_lm_head": true } } }, diff --git a/Qwen-Qwen3.5-27B/builtin/cuda/text.json b/Qwen-Qwen3.5-27B/builtin/cuda/text.json index 274251c95..e9a23b2ae 100644 --- a/Qwen-Qwen3.5-27B/builtin/cuda/text.json +++ b/Qwen-Qwen3.5-27B/builtin/cuda/text.json @@ -10,7 +10,6 @@ "int4_accuracy_level": 4, "extra_options": { "filename": "text.onnx", - "quant_mode": "int4", "prune_lm_head": true } }, diff --git a/Qwen-Qwen3.5-2B/builtin/cpu_and_mobile/text.json b/Qwen-Qwen3.5-2B/builtin/cpu_and_mobile/text.json index 8fb8aecee..7c7b5267f 100644 --- a/Qwen-Qwen3.5-2B/builtin/cpu_and_mobile/text.json +++ b/Qwen-Qwen3.5-2B/builtin/cpu_and_mobile/text.json @@ -12,7 +12,6 @@ "int4_block_size": 128, "extra_options": { "filename": "text.onnx", - "quant_mode": "int4", "prune_lm_head": true } }, diff --git a/Qwen-Qwen3.5-2B/builtin/webgpu/text.json b/Qwen-Qwen3.5-2B/builtin/webgpu/text.json index a7976f7d6..cb2abf365 100644 --- a/Qwen-Qwen3.5-2B/builtin/webgpu/text.json +++ b/Qwen-Qwen3.5-2B/builtin/webgpu/text.json @@ -12,8 +12,7 @@ "int4_block_size": 32, "extra_options": { "filename": "text.onnx", - "prune_lm_head": true, - "quant_mode": "int4" + "prune_lm_head": true } } }, diff --git a/Qwen-Qwen3.5-4B/builtin/cpu_and_mobile/text.json b/Qwen-Qwen3.5-4B/builtin/cpu_and_mobile/text.json index 54c522bc6..e9b05f42d 100644 --- a/Qwen-Qwen3.5-4B/builtin/cpu_and_mobile/text.json +++ b/Qwen-Qwen3.5-4B/builtin/cpu_and_mobile/text.json @@ -12,7 +12,6 @@ "int4_algo_config": "rtn", "extra_options": { "filename": "text.onnx", - "quant_mode": "int4", "prune_lm_head": true } }, diff --git a/Qwen-Qwen3.5-4B/builtin/cuda/text.json b/Qwen-Qwen3.5-4B/builtin/cuda/text.json index df7149658..d69267114 100644 --- a/Qwen-Qwen3.5-4B/builtin/cuda/text.json +++ b/Qwen-Qwen3.5-4B/builtin/cuda/text.json @@ -12,7 +12,6 @@ "int4_block_size": 32, "extra_options": { "filename": "text.onnx", - "quant_mode": "int4", "prune_lm_head": true } }, diff --git a/Qwen-Qwen3.5-4B/builtin/webgpu/text.json b/Qwen-Qwen3.5-4B/builtin/webgpu/text.json index 6e79cba50..0578f27f7 100644 --- a/Qwen-Qwen3.5-4B/builtin/webgpu/text.json +++ b/Qwen-Qwen3.5-4B/builtin/webgpu/text.json @@ -12,8 +12,7 @@ "int4_algo_config": "rtn", "extra_options": { "filename": "text.onnx", - "prune_lm_head": true, - "quant_mode": "int4" + "prune_lm_head": true } } }, diff --git a/Qwen-Qwen3.5-9B/builtin/cpu_and_mobile/text.json b/Qwen-Qwen3.5-9B/builtin/cpu_and_mobile/text.json index 38e2f5a9b..3b7af6c73 100644 --- a/Qwen-Qwen3.5-9B/builtin/cpu_and_mobile/text.json +++ b/Qwen-Qwen3.5-9B/builtin/cpu_and_mobile/text.json @@ -10,7 +10,6 @@ "int4_accuracy_level": 4, "extra_options": { "filename": "text.onnx", - "quant_mode": "int4", "prune_lm_head": true } }, diff --git a/Qwen-Qwen3.5-9B/builtin/cuda/text.json b/Qwen-Qwen3.5-9B/builtin/cuda/text.json index aac4d8231..a8716de8e 100644 --- a/Qwen-Qwen3.5-9B/builtin/cuda/text.json +++ b/Qwen-Qwen3.5-9B/builtin/cuda/text.json @@ -10,7 +10,6 @@ "int4_accuracy_level": 4, "extra_options": { "filename": "text.onnx", - "quant_mode": "int4", "prune_lm_head": true } }, diff --git a/Qwen-Qwen3.5-9B/builtin/webgpu/text.json b/Qwen-Qwen3.5-9B/builtin/webgpu/text.json index 9f8b94882..38808e3fc 100644 --- a/Qwen-Qwen3.5-9B/builtin/webgpu/text.json +++ b/Qwen-Qwen3.5-9B/builtin/webgpu/text.json @@ -10,8 +10,7 @@ "int4_accuracy_level": 4, "extra_options": { "filename": "text.onnx", - "prune_lm_head": true, - "quant_mode": "int4" + "prune_lm_head": true } } },