diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json index fdc430364..c7548f69d 100644 --- a/.aitk/configs/checks.json +++ b/.aitk/configs/checks.json @@ -1,6 +1,6 @@ { "configCheck": 173, - "copyCheck": 182, + "copyCheck": 181, "executePatchPyCheck": 0, "executeRuntimeCheck": 106, "extensionCheck": 2, @@ -14,6 +14,6 @@ "pathCheck": 1463, "requirementsCheck": 44, "templateCheck": 3, - "venvRequirementsCheck": 22, + "venvRequirementsCheck": 23, "winmlCopyCheck": 38 } diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json index a207d12d8..faeb9a88f 100644 --- a/.aitk/configs/model_list.json +++ b/.aitk/configs/model_list.json @@ -18,7 +18,7 @@ "architecture": "Transformer", "status": "Ready", "relativePath": "microsoft-Phi-3.5-mini-instruct/aitk", - "version": 7, + "version": 8, "p0": true, "pipeline_tags": [ "text-generation" @@ -69,7 +69,7 @@ "architecture": "Transformer", "status": "Ready", "relativePath": "deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk", - "version": 7, + "version": 8, "p0": true, "pipeline_tags": [ "text-generation" @@ -193,7 +193,7 @@ "architecture": "Transformer", "status": "Ready", "relativePath": "meta-llama-Llama-3.2-1B-Instruct/aitk", - "version": 7, + "version": 8, "p0": true, "pipeline_tags": [ "text-generation" @@ -269,7 +269,7 @@ "architecture": "Transformer", "status": "Ready", "relativePath": "Qwen-Qwen2.5-1.5B-Instruct/aitk", - "version": 7, + "version": 8, "p0": true, "pipeline_tags": [ "text-generation" @@ -522,7 +522,7 @@ "architecture": "Transformer", "status": "Ready", "relativePath": "meta-llama-Llama-3.1-8B-Instruct/aitk", - "version": 5, + "version": 6, "p0": false, "pipeline_tags": [ "text-generation" @@ -1049,7 +1049,8 @@ "bfloat16": "a:bf16" }, "QuarkWeightType": { - "w_uint4_per_group_asym": "w:int4" + "w_uint4_per_group_asym": "w:uint4", + "uint4_wo_128": "w:uint4" } } } diff --git a/.aitk/requirements/AMD/Quark_py3.12.13.txt b/.aitk/requirements/AMD/Quark_py3.12.13.txt new file mode 100644 index 000000000..2c804e548 --- /dev/null +++ b/.aitk/requirements/AMD/Quark_py3.12.13.txt @@ -0,0 +1,107 @@ +--extra-index-url=https://download.pytorch.org/whl/cu128 +--extra-index-url=https://pypi.amd.com/olive/1.7.1-5D/simple +--extra-index-url=https://pypi.amd.com/simple +accelerate==1.13.0 +aiohappyeyeballs==2.6.1 +aiohttp==3.13.5 +aiosignal==1.4.0 +alembic==1.18.4 +amd-quark==0.11 +annotated-doc==0.0.4 +annotated-types==0.7.0 +anyio==4.13.0 +attrs==26.1.0 +certifi==2026.4.22 +charset-normalizer==3.4.7 +click==8.4.0 +colorama==0.4.6 +colorlog==6.10.1 +datasets==4.8.5 +dill==0.4.1 +evaluate==0.4.6 +filelock==3.29.0 +flatbuffers==25.12.19 +frozenlist==1.8.0 +fsspec==2026.2.0 +greenlet==3.5.0 +h11==0.16.0 +hf-xet==1.5.0 +httpcore==1.0.9 +httpx==0.28.1 +huggingface-hub==0.36.2 +idna==3.15 +importlib-metadata==8.7.1 +jinja2==3.1.6 +joblib==1.5.3 +lightning-utilities==0.15.3 +mako==1.3.12 +markdown-it-py==4.2.0 +markupsafe==3.0.3 +mdurl==0.1.2 +ml-dtypes==0.5.4 +model-generate==1.7.1 +mpmath==1.3.0 +multidict==6.7.1 +multiprocess==0.70.19 +narwhals==2.21.2 +networkx==3.6.1 +ninja==1.13.0 +nltk==3.9.4 +numpy==1.26.4 +olive-ai==0.12.1 +onnx==1.18.0 +onnx-ir==0.2.1 +onnx-tool==1.0.1 +onnxruntime==1.26.0 +onnxruntime-genai==0.13.2 +onnxscript==0.7.0 +onnxsim==0.6.3 +onnxslim==0.1.93 +opentelemetry-api==1.41.1 +opentelemetry-sdk==1.41.1 +opentelemetry-semantic-conventions==0.62b1 +optimum==2.1.0 +optuna==4.8.0 +packaging==26.2 +pandas==3.0.3 +plotly==6.7.0 +prompt-toolkit==3.0.52 +propcache==0.5.2 +protobuf==7.34.1 +psutil==7.2.2 +pyarrow==23.0.1 +pydantic==2.13.4 +pydantic-core==2.46.4 +pygments==2.20.0 +python-dateutil==2.9.0.post0 +pyyaml==6.0.3 +questionary==2.1.1 +regex==2026.5.9 +requests==2.34.2 +rich==15.0.0 +ryzenai-dynamic-dispatch==1.7.1 +ryzenai-onnx-utils==1.7.1 +safetensors==0.7.0 +scipy==1.17.1 +sentencepiece==0.2.1 +setuptools==81.0.0 +shellingham==1.5.4 +six==1.17.0 +sqlalchemy==2.0.49 +sympy==1.14.0 +tabulate==0.10.0 +tokenizers==0.22.2 +torch==2.7.1+cu128 +torchmetrics==1.9.0 +tqdm==4.67.3 +transformers==4.57.6 +typer==0.25.1 +typing-extensions==4.15.0 +typing-inspection==0.4.2 +tzdata==2026.2 +urllib3==2.7.0 +wcwidth==0.7.0 +xxhash==3.7.0 +yarl==1.23.0 +zipp==4.1.0 +zstandard==0.25.0 diff --git a/Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config b/Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config index 1c98763a9..1a95be2ae 100644 --- a/Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config +++ b/Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml index af6ec72c3..caf4dafbb 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml @@ -12,7 +12,7 @@ recipes: ep: VitisAIExecutionProvider aitk: oliveFile: "VitisAI/Qwen2.5-1.5B-Instruct_quark_vitisai_llm.json" - requirements: AMD/Quark_py3.10.17 + requirements: AMD/Quark_py3.12.13 evalRuntime: AMDNPU - file: "qwen2_5_ov_gpu_config.json" devices: @@ -41,7 +41,7 @@ recipes: aitk: modelInfo: id: "huggingface/Qwen/Qwen2.5-1.5B-Instruct" - version: 7 + version: 8 groupId: "huggingface/Qwen/Qwen2.5-1.5B-Instruct" groupItemName: "1.5B" p0: true diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config index fdd9f88f7..ea852ef70 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config @@ -31,6 +31,6 @@ ], "modelInfo": { "id": "huggingface/Qwen/Qwen2.5-1.5B-Instruct", - "version": 7 + "version": 8 } } diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json index bbd0de148..5ea92228f 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json @@ -21,35 +21,18 @@ "passes": { "qq": { "type": "QuarkQuantization", - "quant_scheme": "w_uint4_per_group_asym", + "quant_scheme": "uint4_wo_128", "quant_algo": "awq", "dataset": "pileval_for_awq_benchmark", "data_type": "bfloat16", "num_calib_data": 128, - "model_export": [ "hf_format" ], - "exclude_layers": [ ], - "quant_config": { - "name": "awq", - "scaling_layers": [ - { - "prev_op": "input_layernorm", - "layers": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj" ], - "inp": "self_attn.q_proj", - "module2inspect": "self_attn" - }, - { "prev_op": "self_attn.v_proj", "layers": [ "self_attn.o_proj" ], "inp": "self_attn.o_proj" }, - { - "prev_op": "post_attention_layernorm", - "layers": [ "mlp.gate_proj", "mlp.up_proj" ], - "inp": "mlp.gate_proj", - "module2inspect": "mlp" - }, - { "prev_op": "mlp.up_proj", "layers": [ "mlp.down_proj" ], "inp": "mlp.down_proj" } - ], - "model_decoder_layers": "model.layers" - } + "model_export": ["hf_format"], + "exclude_layers": [] }, - "mg": { "type": "VitisGenerateModelLLM", "packed_const": false, "cpu_only": false } + "mg": { + "type": "VitisGenerateModelLLM", + "recipe": "full_fusion" + } }, "target": "local_system", "log_severity_level": 1, diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config index 5a64cbcb6..a608a4515 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config @@ -11,7 +11,7 @@ "isGPUSuggested": true, "runtimeOverwrite": { "autoGenerated": true, - "executeRequirement": "AMD/Quark_py3.10.17" + "executeRequirement": "AMD/Quark_py3.12.13" }, "epMinVersions": { "VitisAIExecutionProvider": "1.8.50" @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config b/Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config index cba12f351..3ff255173 100644 --- a/Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config +++ b/Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/Qwen-Qwen2.5-Coder-0.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config b/Qwen-Qwen2.5-Coder-0.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config index 1a31c0a53..27e12328f 100644 --- a/Qwen-Qwen2.5-Coder-0.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config +++ b/Qwen-Qwen2.5-Coder-0.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/Qwen-Qwen2.5-Coder-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config b/Qwen-Qwen2.5-Coder-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config index 6cbffe8a5..90a5871cb 100644 --- a/Qwen-Qwen2.5-Coder-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config +++ b/Qwen-Qwen2.5-Coder-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/Qwen-Qwen2.5-Coder-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config b/Qwen-Qwen2.5-Coder-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config index cb398a3ed..6d6107325 100644 --- a/Qwen-Qwen2.5-Coder-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config +++ b/Qwen-Qwen2.5-Coder-7B-Instruct/aitk/qwen2_5_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json index ff6dffe6b..d7f7d1907 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json @@ -20,36 +20,19 @@ ], "passes": { "qq": { - "type": "QuarkQuantization", - "quant_scheme": "w_uint4_per_group_asym", + "type": "QuarkQuantization", + "quant_scheme": "uint4_wo_128", "quant_algo": "awq", "dataset": "pileval_for_awq_benchmark", "data_type": "bfloat16", "num_calib_data": 128, - "model_export": [ "hf_format" ], - "exclude_layers": [ ], - "quant_config": { - "name": "awq", - "scaling_layers": [ - { - "prev_op": "input_layernorm", - "layers": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj" ], - "inp": "self_attn.q_proj", - "module2inspect": "self_attn" - }, - { "prev_op": "self_attn.v_proj", "layers": [ "self_attn.o_proj" ], "inp": "self_attn.o_proj" }, - { - "prev_op": "post_attention_layernorm", - "layers": [ "mlp.gate_proj", "mlp.up_proj" ], - "inp": "mlp.gate_proj", - "module2inspect": "mlp" - }, - { "prev_op": "mlp.up_proj", "layers": [ "mlp.down_proj" ], "inp": "mlp.down_proj" } - ], - "model_decoder_layers": "model.layers" - } + "model_export": ["hf_format"], + "exclude_layers": [] }, - "mg": { "type": "VitisGenerateModelLLM", "packed_const": false, "cpu_only": false } + "mg": { + "type": "VitisGenerateModelLLM", + "recipe": "full_fusion" + } }, "target": "local_system", "log_severity_level": 1, diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json.config index feada2a93..2070828ca 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json.config +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json.config @@ -11,7 +11,7 @@ "isGPUSuggested": true, "runtimeOverwrite": { "autoGenerated": true, - "executeRequirement": "AMD/Quark_py3.10.17" + "executeRequirement": "AMD/Quark_py3.12.13" }, "epMinVersions": { "VitisAIExecutionProvider": "1.8.50" @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml index 9486b13f9..5400e5bdf 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml @@ -12,7 +12,7 @@ recipes: ep: VitisAIExecutionProvider aitk: oliveFile: "VitisAI/DeepSeek-R1-Distill-Qwen-1.5B_quark_vitisai_llm.json" - requirements: AMD/Quark_py3.10.17 + requirements: AMD/Quark_py3.12.13 evalRuntime: AMDNPU - file: "deepseek_ov_gpu_config.json" devices: @@ -41,7 +41,7 @@ recipes: aitk: modelInfo: id: "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" - version: 7 + version: 8 groupId: "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" groupItemName: "1.5B" p0: true diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config index 64ea7551e..fd3da91fe 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config @@ -31,6 +31,6 @@ ], "modelInfo": { "id": "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "version": 7 + "version": 8 } } diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_vitis_ai_config.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_vitis_ai_config.json.config index 968721c07..c5ab868d8 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_vitis_ai_config.json.config +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/meta-llama-Llama-3.1-8B-Instruct/aitk/_copy.json.config b/meta-llama-Llama-3.1-8B-Instruct/aitk/_copy.json.config index 9c433fa10..c78d3191d 100644 --- a/meta-llama-Llama-3.1-8B-Instruct/aitk/_copy.json.config +++ b/meta-llama-Llama-3.1-8B-Instruct/aitk/_copy.json.config @@ -1,19 +1,5 @@ { "copies": [ - { - "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json", - "dst": "llama3_1_vitis_ai_config.json", - "replacements": [ - { - "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "replace": "meta-llama/Llama-3.1-8B-Instruct" - }, - { - "find": "model/deepseek", - "replace": "model/llama3_1" - } - ] - }, { "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json", "dst": "llama3_1_dml_config.json", diff --git a/meta-llama-Llama-3.1-8B-Instruct/aitk/info.yml b/meta-llama-Llama-3.1-8B-Instruct/aitk/info.yml index ff6ed17d6..0ffd84c03 100644 --- a/meta-llama-Llama-3.1-8B-Instruct/aitk/info.yml +++ b/meta-llama-Llama-3.1-8B-Instruct/aitk/info.yml @@ -19,7 +19,7 @@ recipes: ep: VitisAIExecutionProvider aitk: oliveFile: "VitisAI/Llama-3.1-8B-Instruct_quark_vitisai_llm.json" - requirements: AMD/Quark_py3.10.17 + requirements: AMD/Quark_py3.12.13 evalRuntime: AMDNPU isGPUSuggested: true - file: "llama3_1_ov_config.json" @@ -42,5 +42,5 @@ recipes: aitk: modelInfo: id: "huggingface/meta-llama/Llama-3.1-8B-Instruct" - version: 5 + version: 6 p0: false diff --git a/meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json b/meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json index 2cbfe5bf2..aa140b928 100644 --- a/meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json +++ b/meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json @@ -21,35 +21,18 @@ "passes": { "qq": { "type": "QuarkQuantization", - "quant_scheme": "w_uint4_per_group_asym", + "quant_scheme": "uint4_wo_128", "quant_algo": "awq", "dataset": "pileval_for_awq_benchmark", "data_type": "bfloat16", "num_calib_data": 128, - "model_export": [ "hf_format" ], - "exclude_layers": [ ], - "quant_config": { - "name": "awq", - "scaling_layers": [ - { - "prev_op": "input_layernorm", - "layers": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj" ], - "inp": "self_attn.q_proj", - "module2inspect": "self_attn" - }, - { "prev_op": "self_attn.v_proj", "layers": [ "self_attn.o_proj" ], "inp": "self_attn.o_proj" }, - { - "prev_op": "post_attention_layernorm", - "layers": [ "mlp.gate_proj", "mlp.up_proj" ], - "inp": "mlp.gate_proj", - "module2inspect": "mlp" - }, - { "prev_op": "mlp.up_proj", "layers": [ "mlp.down_proj" ], "inp": "mlp.down_proj" } - ], - "model_decoder_layers": "model.layers" - } + "model_export": ["hf_format"], + "exclude_layers": [] }, - "mg": { "type": "VitisGenerateModelLLM", "packed_const": false, "cpu_only": false } + "mg": { + "type": "VitisGenerateModelLLM", + "recipe": "full_fusion" + } }, "target": "local_system", "log_severity_level": 1, diff --git a/meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json.config b/meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json.config index ed2ced33a..a7dec8351 100644 --- a/meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json.config +++ b/meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json.config @@ -11,7 +11,7 @@ "isGPUSuggested": true, "runtimeOverwrite": { "autoGenerated": true, - "executeRequirement": "AMD/Quark_py3.10.17" + "executeRequirement": "AMD/Quark_py3.12.13" }, "epMinVersions": { "VitisAIExecutionProvider": "1.8.50" @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/meta-llama-Llama-3.1-8B-Instruct/aitk/model_project.config b/meta-llama-Llama-3.1-8B-Instruct/aitk/model_project.config index f3d61ff91..27c43762a 100644 --- a/meta-llama-Llama-3.1-8B-Instruct/aitk/model_project.config +++ b/meta-llama-Llama-3.1-8B-Instruct/aitk/model_project.config @@ -31,6 +31,6 @@ ], "modelInfo": { "id": "huggingface/meta-llama/Llama-3.1-8B-Instruct", - "version": 5 + "version": 6 } } diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml index 803ac4a12..b9f8aff5c 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml @@ -12,7 +12,7 @@ recipes: ep: VitisAIExecutionProvider aitk: oliveFile: "VitisAI/Llama-3.2-1B-Instruct_quark_vitisai_llm.json" - requirements: AMD/Quark_py3.10.17 + requirements: AMD/Quark_py3.12.13 evalRuntime: AMDNPU - file: "llama3_2_ov_config.json" devices: @@ -41,5 +41,5 @@ recipes: aitk: modelInfo: id: "huggingface/meta-llama/Llama-3.2-1B-Instruct" - version: 7 + version: 8 p0: true diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json index 38dceba2b..11cf511ae 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json @@ -21,35 +21,18 @@ "passes": { "qq": { "type": "QuarkQuantization", - "quant_scheme": "w_uint4_per_group_asym", + "quant_scheme": "uint4_wo_128", "quant_algo": "awq", "dataset": "pileval_for_awq_benchmark", "data_type": "bfloat16", "num_calib_data": 128, - "model_export": [ "hf_format" ], - "exclude_layers": [ ], - "quant_config": { - "name": "awq", - "scaling_layers": [ - { - "prev_op": "input_layernorm", - "layers": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj" ], - "inp": "self_attn.q_proj", - "module2inspect": "self_attn" - }, - { "prev_op": "self_attn.v_proj", "layers": [ "self_attn.o_proj" ], "inp": "self_attn.o_proj" }, - { - "prev_op": "post_attention_layernorm", - "layers": [ "mlp.gate_proj", "mlp.up_proj" ], - "inp": "mlp.gate_proj", - "module2inspect": "mlp" - }, - { "prev_op": "mlp.up_proj", "layers": [ "mlp.down_proj" ], "inp": "mlp.down_proj" } - ], - "model_decoder_layers": "model.layers" - } + "model_export": ["hf_format"], + "exclude_layers": [] }, - "mg": { "type": "VitisGenerateModelLLM", "packed_const": false, "cpu_only": false } + "mg": { + "type": "VitisGenerateModelLLM", + "recipe": "full_fusion" + } }, "target": "local_system", "log_severity_level": 1, diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json.config index fa4b549b8..762473e3d 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json.config +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json.config @@ -11,7 +11,7 @@ "isGPUSuggested": true, "runtimeOverwrite": { "autoGenerated": true, - "executeRequirement": "AMD/Quark_py3.10.17" + "executeRequirement": "AMD/Quark_py3.12.13" }, "epMinVersions": { "VitisAIExecutionProvider": "1.8.50" @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config index 3df076bbb..769ec19e2 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config @@ -31,6 +31,6 @@ ], "modelInfo": { "id": "huggingface/meta-llama/Llama-3.2-1B-Instruct", - "version": 7 + "version": 8 } } diff --git a/microsoft-Phi-3-mini-128k-instruct/aitk/phi3_vitis_ai_config.json.config b/microsoft-Phi-3-mini-128k-instruct/aitk/phi3_vitis_ai_config.json.config index 8ca32fdef..f78b6ae10 100644 --- a/microsoft-Phi-3-mini-128k-instruct/aitk/phi3_vitis_ai_config.json.config +++ b/microsoft-Phi-3-mini-128k-instruct/aitk/phi3_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/microsoft-Phi-3-mini-4k-instruct/aitk/phi3_vitis_ai_config.json.config b/microsoft-Phi-3-mini-4k-instruct/aitk/phi3_vitis_ai_config.json.config index 5c6254fb2..ba2193358 100644 --- a/microsoft-Phi-3-mini-4k-instruct/aitk/phi3_vitis_ai_config.json.config +++ b/microsoft-Phi-3-mini-4k-instruct/aitk/phi3_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml index 1f85b22d8..e7fd3e325 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml +++ b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml @@ -12,7 +12,7 @@ recipes: ep: VitisAIExecutionProvider aitk: oliveFile: "VitisAI/Phi-3.5-mini-instruct_quark_vitisai_llm.json" - requirements: AMD/Quark_py3.10.17 + requirements: AMD/Quark_py3.12.13 evalRuntime: AMDNPU - file: "phi3_5_ov_gpu_config.json" devices: @@ -41,5 +41,5 @@ recipes: aitk: modelInfo: id: "huggingface/microsoft/Phi-3.5-mini-instruct" - version: 7 + version: 8 p0: true diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config index 1162f8288..409edc015 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config +++ b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config @@ -31,6 +31,6 @@ ], "modelInfo": { "id": "huggingface/microsoft/Phi-3.5-mini-instruct", - "version": 7 + "version": 8 } } diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json index 741a8c026..edf53d12e 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json @@ -21,35 +21,18 @@ "passes": { "qq": { "type": "QuarkQuantization", - "quant_scheme": "w_uint4_per_group_asym", + "quant_scheme": "uint4_wo_128", "quant_algo": "awq", "dataset": "pileval_for_awq_benchmark", "data_type": "bfloat16", "num_calib_data": 128, - "model_export": [ "hf_format" ], - "exclude_layers": [ ], - "quant_config": { - "name": "awq", - "scaling_layers": [ - { - "prev_op": "input_layernorm", - "layers": [ "self_attn.qkv_proj" ], - "inp": "self_attn.qkv_proj", - "module2inspect": "self_attn" - }, - { "prev_op": "self_attn.qkv_proj", "layers": [ "self_attn.o_proj" ], "inp": "self_attn.o_proj" }, - { - "prev_op": "post_attention_layernorm", - "layers": [ "mlp.gate_up_proj" ], - "inp": "mlp.gate_up_proj", - "module2inspect": "mlp" - }, - { "prev_op": "mlp.gate_up_proj", "layers": [ "mlp.down_proj" ], "inp": "mlp.down_proj" } - ], - "model_decoder_layers": "model.layers" - } + "model_export": ["hf_format"], + "exclude_layers": [] }, - "mg": { "type": "VitisGenerateModelLLM", "packed_const": false, "cpu_only": false } + "mg": { + "type": "VitisGenerateModelLLM", + "recipe": "full_fusion" + } }, "target": "local_system", "log_severity_level": 1, diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json.config index c3cede358..2ca775433 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json.config +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json.config @@ -11,7 +11,7 @@ "isGPUSuggested": true, "runtimeOverwrite": { "autoGenerated": true, - "executeRequirement": "AMD/Quark_py3.10.17" + "executeRequirement": "AMD/Quark_py3.12.13" }, "epMinVersions": { "VitisAIExecutionProvider": "1.8.50" @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/microsoft-Phi-4-mini-instruct/aitk/phi4_vitis_ai_config.json.config b/microsoft-Phi-4-mini-instruct/aitk/phi4_vitis_ai_config.json.config index 4c7b42a26..8e9019939 100644 --- a/microsoft-Phi-4-mini-instruct/aitk/phi4_vitis_ai_config.json.config +++ b/microsoft-Phi-4-mini-instruct/aitk/phi4_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/microsoft-Phi-4-mini-reasoning/aitk/phi4_vitis_ai_config.json.config b/microsoft-Phi-4-mini-reasoning/aitk/phi4_vitis_ai_config.json.config index 547ccda61..62e83bd2e 100644 --- a/microsoft-Phi-4-mini-reasoning/aitk/phi4_vitis_ai_config.json.config +++ b/microsoft-Phi-4-mini-reasoning/aitk/phi4_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true, diff --git a/mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_vitis_ai_config.json.config b/mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_vitis_ai_config.json.config index bf876512b..544070a34 100644 --- a/mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_vitis_ai_config.json.config +++ b/mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_vitis_ai_config.json.config @@ -39,7 +39,7 @@ "name": "QuarkDataType" } ], - "optimizationDefault": "w:int4 a:bf16", + "optimizationDefault": "w:uint4 a:bf16", "sections": [ { "autoGenerated": true,