Skip to content
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .aitk/configs/checks.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"configCheck": 171,
"copyCheck": 182,
"copyCheck": 181,
"executeRuntimeCheck": 104,
"extensionCheck": 2,
"gitignoreCheck": 44,
Expand All @@ -13,6 +13,6 @@
"pathCheck": 1455,
"requirementsCheck": 37,
"templateCheck": 3,
"venvRequirementsCheck": 22,
"venvRequirementsCheck": 23,
"winmlCopyCheck": 38
}
13 changes: 7 additions & 6 deletions .aitk/configs/model_list.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"architecture": "Transformer",
"status": "Ready",
"relativePath": "microsoft-Phi-3.5-mini-instruct/aitk",
"version": 7,
"version": 8,
"p0": true,
"pipeline_tags": [
"text-generation"
Expand Down Expand Up @@ -69,7 +69,7 @@
"architecture": "Transformer",
"status": "Ready",
"relativePath": "deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk",
"version": 7,
"version": 8,
"p0": true,
"pipeline_tags": [
"text-generation"
Expand Down Expand Up @@ -193,7 +193,7 @@
"architecture": "Transformer",
"status": "Ready",
"relativePath": "meta-llama-Llama-3.2-1B-Instruct/aitk",
"version": 7,
"version": 8,
"p0": true,
"pipeline_tags": [
"text-generation"
Expand Down Expand Up @@ -269,7 +269,7 @@
"architecture": "Transformer",
"status": "Ready",
"relativePath": "Qwen-Qwen2.5-1.5B-Instruct/aitk",
"version": 7,
"version": 8,
"p0": true,
"pipeline_tags": [
"text-generation"
Expand Down Expand Up @@ -520,7 +520,7 @@
"architecture": "Transformer",
"status": "Ready",
"relativePath": "meta-llama-Llama-3.1-8B-Instruct/aitk",
"version": 5,
"version": 6,
"p0": false,
"pipeline_tags": [
"text-generation"
Expand Down Expand Up @@ -1047,7 +1047,8 @@
"bfloat16": "a:bf16"
},
"QuarkWeightType": {
"w_uint4_per_group_asym": "w:int4"
"w_uint4_per_group_asym": "w:uint4",
"uint4_wo_128": "w:uint4"
}
}
}
107 changes: 107 additions & 0 deletions .aitk/requirements/AMD/Quark_py3.12.13.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
--extra-index-url=https://download.pytorch.org/whl/cu128
--extra-index-url=https://pypi.amd.com/olive/1.7.1-5D/simple
--extra-index-url=https://pypi.amd.com/simple
accelerate==1.13.0
aiohappyeyeballs==2.6.1
aiohttp==3.13.5
aiosignal==1.4.0
alembic==1.18.4
amd-quark==0.11
annotated-doc==0.0.4
annotated-types==0.7.0
anyio==4.13.0
attrs==26.1.0
certifi==2026.4.22
charset-normalizer==3.4.7
click==8.4.0
colorama==0.4.6
colorlog==6.10.1
datasets==4.8.5
dill==0.4.1
evaluate==0.4.6
filelock==3.29.0
flatbuffers==25.12.19
frozenlist==1.8.0
fsspec==2026.2.0
greenlet==3.5.0
h11==0.16.0
hf-xet==1.5.0
httpcore==1.0.9
httpx==0.28.1
huggingface-hub==0.36.2
idna==3.15
importlib-metadata==8.7.1
jinja2==3.1.6
joblib==1.5.3
lightning-utilities==0.15.3
mako==1.3.12
markdown-it-py==4.2.0
markupsafe==3.0.3
mdurl==0.1.2
ml-dtypes==0.5.4
model-generate==1.7.1
mpmath==1.3.0
multidict==6.7.1
multiprocess==0.70.19
narwhals==2.21.2
networkx==3.6.1
ninja==1.13.0
nltk==3.9.4
numpy==1.26.4
olive-ai==0.12.1
onnx==1.18.0
onnx-ir==0.2.1
onnx-tool==1.0.1
onnxruntime==1.26.0
onnxruntime-genai==0.13.2
onnxscript==0.7.0
onnxsim==0.6.3
onnxslim==0.1.93
opentelemetry-api==1.41.1
opentelemetry-sdk==1.41.1
opentelemetry-semantic-conventions==0.62b1
optimum==2.1.0
optuna==4.8.0
packaging==26.2
pandas==3.0.3
plotly==6.7.0
prompt-toolkit==3.0.52
propcache==0.5.2
protobuf==7.34.1
psutil==7.2.2
pyarrow==24.0.0
pydantic==2.13.4
pydantic-core==2.46.4
pygments==2.20.0
python-dateutil==2.9.0.post0
pyyaml==6.0.3
questionary==2.1.1
regex==2026.5.9
requests==2.34.2
rich==15.0.0
ryzenai-dynamic-dispatch==1.7.1
ryzenai-onnx-utils==1.7.1
safetensors==0.7.0
scipy==1.17.1
sentencepiece==0.2.1
setuptools==81.0.0
shellingham==1.5.4
six==1.17.0
sqlalchemy==2.0.49
sympy==1.14.0
tabulate==0.10.0
tokenizers==0.22.2
torch==2.7.1+cu128
torchmetrics==1.9.0
tqdm==4.67.3
transformers==4.57.6
typer==0.25.1
typing-extensions==4.15.0
typing-inspection==0.4.2
tzdata==2026.2
urllib3==2.7.0
wcwidth==0.7.0
xxhash==3.7.0
yarl==1.23.0
zipp==4.1.0
zstandard==0.25.0
7 changes: 7 additions & 0 deletions .aitk/requirements/AMD/sitecustomize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
try:
import pyarrow # noqa
Comment thread
github-code-quality[bot] marked this conversation as resolved.
Fixed
import pyarrow.dataset # noqa
Comment thread
github-code-quality[bot] marked this conversation as resolved.
Fixed
import pyarrow.compute # noqa
Comment thread
github-code-quality[bot] marked this conversation as resolved.
Fixed
except Exception:
Comment thread
github-code-quality[bot] marked this conversation as resolved.
Fixed
pass

Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"name": "QuarkDataType"
}
],
"optimizationDefault": "w:int4 a:bf16",
"optimizationDefault": "w:uint4 a:bf16",
"sections": [
{
"autoGenerated": true,
Expand Down
4 changes: 2 additions & 2 deletions Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ recipes:
ep: VitisAIExecutionProvider
aitk:
oliveFile: "VitisAI/Qwen2.5-1.5B-Instruct_quark_vitisai_llm.json"
requirements: AMD/Quark_py3.10.17
requirements: AMD/Quark_py3.12.13
evalRuntime: AMDNPU
- file: "qwen2_5_ov_gpu_config.json"
devices:
Expand Down Expand Up @@ -41,7 +41,7 @@ recipes:
aitk:
modelInfo:
id: "huggingface/Qwen/Qwen2.5-1.5B-Instruct"
version: 7
version: 8
groupId: "huggingface/Qwen/Qwen2.5-1.5B-Instruct"
groupItemName: "1.5B"
p0: true
2 changes: 1 addition & 1 deletion Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@
],
"modelInfo": {
"id": "huggingface/Qwen/Qwen2.5-1.5B-Instruct",
"version": 7
"version": 8
}
}
31 changes: 7 additions & 24 deletions Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,35 +21,18 @@
"passes": {
"qq": {
"type": "QuarkQuantization",
"quant_scheme": "w_uint4_per_group_asym",
"quant_scheme": "uint4_wo_128",
"quant_algo": "awq",
"dataset": "pileval_for_awq_benchmark",
"data_type": "bfloat16",
"num_calib_data": 128,
"model_export": [ "hf_format" ],
"exclude_layers": [ ],
"quant_config": {
"name": "awq",
"scaling_layers": [
{
"prev_op": "input_layernorm",
"layers": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj" ],
"inp": "self_attn.q_proj",
"module2inspect": "self_attn"
},
{ "prev_op": "self_attn.v_proj", "layers": [ "self_attn.o_proj" ], "inp": "self_attn.o_proj" },
{
"prev_op": "post_attention_layernorm",
"layers": [ "mlp.gate_proj", "mlp.up_proj" ],
"inp": "mlp.gate_proj",
"module2inspect": "mlp"
},
{ "prev_op": "mlp.up_proj", "layers": [ "mlp.down_proj" ], "inp": "mlp.down_proj" }
],
"model_decoder_layers": "model.layers"
}
"model_export": ["hf_format"],
"exclude_layers": []
},
"mg": { "type": "VitisGenerateModelLLM", "packed_const": false, "cpu_only": false }
"mg": {
"type": "VitisGenerateModelLLM",
"recipe": "full_fusion"
}
},
"target": "local_system",
"log_severity_level": 1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"isGPUSuggested": true,
"runtimeOverwrite": {
"autoGenerated": true,
"executeRequirement": "AMD/Quark_py3.10.17"
"executeRequirement": "AMD/Quark_py3.12.13"
},
"epMinVersions": {
"VitisAIExecutionProvider": "1.8.50"
Expand Down Expand Up @@ -39,7 +39,7 @@
"name": "QuarkDataType"
}
],
"optimizationDefault": "w:int4 a:bf16",
"optimizationDefault": "w:uint4 a:bf16",
"sections": [
{
"autoGenerated": true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"name": "QuarkDataType"
}
],
"optimizationDefault": "w:int4 a:bf16",
"optimizationDefault": "w:uint4 a:bf16",
"sections": [
{
"autoGenerated": true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"name": "QuarkDataType"
}
],
"optimizationDefault": "w:int4 a:bf16",
"optimizationDefault": "w:uint4 a:bf16",
"sections": [
{
"autoGenerated": true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"name": "QuarkDataType"
}
],
"optimizationDefault": "w:int4 a:bf16",
"optimizationDefault": "w:uint4 a:bf16",
"sections": [
{
"autoGenerated": true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"name": "QuarkDataType"
}
],
"optimizationDefault": "w:int4 a:bf16",
"optimizationDefault": "w:uint4 a:bf16",
"sections": [
{
"autoGenerated": true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,36 +20,19 @@
],
"passes": {
"qq": {
"type": "QuarkQuantization",
"quant_scheme": "w_uint4_per_group_asym",
"type": "QuarkQuantization",
"quant_scheme": "uint4_wo_128",
"quant_algo": "awq",
"dataset": "pileval_for_awq_benchmark",
"data_type": "bfloat16",
"num_calib_data": 128,
"model_export": [ "hf_format" ],
"exclude_layers": [ ],
"quant_config": {
"name": "awq",
"scaling_layers": [
{
"prev_op": "input_layernorm",
"layers": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj" ],
"inp": "self_attn.q_proj",
"module2inspect": "self_attn"
},
{ "prev_op": "self_attn.v_proj", "layers": [ "self_attn.o_proj" ], "inp": "self_attn.o_proj" },
{
"prev_op": "post_attention_layernorm",
"layers": [ "mlp.gate_proj", "mlp.up_proj" ],
"inp": "mlp.gate_proj",
"module2inspect": "mlp"
},
{ "prev_op": "mlp.up_proj", "layers": [ "mlp.down_proj" ], "inp": "mlp.down_proj" }
],
"model_decoder_layers": "model.layers"
}
"model_export": ["hf_format"],
"exclude_layers": []
},
"mg": { "type": "VitisGenerateModelLLM", "packed_const": false, "cpu_only": false }
"mg": {
"type": "VitisGenerateModelLLM",
"recipe": "full_fusion"
}
},
"target": "local_system",
"log_severity_level": 1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"isGPUSuggested": true,
"runtimeOverwrite": {
"autoGenerated": true,
"executeRequirement": "AMD/Quark_py3.10.17"
"executeRequirement": "AMD/Quark_py3.12.13"
},
"epMinVersions": {
"VitisAIExecutionProvider": "1.8.50"
Expand Down Expand Up @@ -39,7 +39,7 @@
"name": "QuarkDataType"
}
],
"optimizationDefault": "w:int4 a:bf16",
"optimizationDefault": "w:uint4 a:bf16",
"sections": [
{
"autoGenerated": true,
Expand Down
4 changes: 2 additions & 2 deletions deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ recipes:
ep: VitisAIExecutionProvider
aitk:
oliveFile: "VitisAI/DeepSeek-R1-Distill-Qwen-1.5B_quark_vitisai_llm.json"
requirements: AMD/Quark_py3.10.17
requirements: AMD/Quark_py3.12.13
evalRuntime: AMDNPU
- file: "deepseek_ov_gpu_config.json"
devices:
Expand Down Expand Up @@ -41,7 +41,7 @@ recipes:
aitk:
modelInfo:
id: "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
version: 7
version: 8
groupId: "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
groupItemName: "1.5B"
p0: true
Loading
Loading