microsoft · hanbitmyths · May 31, 2026 · Jun 1, 2026 · Jun 5, 2026
diff --git a/tencent-HY-MT1.5-1.8B/LICENSE b/tencent-HY-MT1.5-1.8B/LICENSE
@@ -0,0 +1,2 @@
+The upstream model is distributed under the Tencent HY Community License Agreement.
+See https://huggingface.co/tencent/HY-MT1.5-1.8B/blob/main/License.txt.
diff --git a/tencent-HY-MT1.5-1.8B/baseline/README.md b/tencent-HY-MT1.5-1.8B/baseline/README.md
@@ -0,0 +1,24 @@
+# tencent-HY-MT1.5-1.8B - Baseline PyTorch Evaluation
+
+This folder contains an Olive recipe for evaluating the Hugging Face PyTorch base model `tencent/HY-MT1.5-1.8B` on WMT18 Chinese-to-English translation with LM-eval.
+
+## Setup
+
+```bash
+pip install -r requirements.txt
+```
+
+## Run evaluation
+
+```bash
+olive run --config tencent-HY-MT1.5-1.8B_pytorch_with_eval.json
+```
+
+## Evaluation results
+
+PyTorch baseline WMT18 Chinese-to-English translation metrics, run on GPU:
+
+- BLEU: `12.634090637487896`
+- chrF: `35.290981008260474`
+- TER: `85.7224234039956`
+- Samples: `3981`
diff --git a/tencent-HY-MT1.5-1.8B/baseline/info.yaml b/tencent-HY-MT1.5-1.8B/baseline/info.yaml
@@ -0,0 +1,6 @@
+arch: hunyuan_v1_dense
+recipes:
+  - name: tencent-HY-MT1.5-1.8B_pytorch_with_eval
+    file: tencent-HY-MT1.5-1.8B_pytorch_with_eval.json
+    devices: gpu
+    eps: PyTorch
diff --git a/tencent-HY-MT1.5-1.8B/baseline/requirements.txt b/tencent-HY-MT1.5-1.8B/baseline/requirements.txt
@@ -0,0 +1,8 @@
+accelerate
+datasets
+lm-eval
+olive-ai
+sentencepiece
+tiktoken
+torch
+transformers==4.56.1
diff --git a/tencent-HY-MT1.5-1.8B/baseline/tencent-HY-MT1.5-1.8B_pytorch_with_eval.json b/tencent-HY-MT1.5-1.8B/baseline/tencent-HY-MT1.5-1.8B_pytorch_with_eval.json
@@ -0,0 +1,75 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "tencent/HY-MT1.5-1.8B",
+        "load_kwargs": {
+            "torch_dtype": "float16"
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu"
+                }
+            ]
+        }
+    },
+    "evaluators": {
+        "zh_en_translation": {
+            "type": "LMEvaluator",
+            "tasks": [
+                {
+                    "task": "wmt18-zh-en",
+                    "dataset_path": "wmt/wmt18",
+                    "dataset_name": "zh-en",
+                    "output_type": "generate_until",
+                    "training_split": "train",
+                    "validation_split": "validation",
+                    "fewshot_split": "validation",
+                    "test_split": "test",
+                    "doc_to_text": "Translate the following segment into English, without additional explanation.\n\n{{translation[\"zh\"]}}\nEnglish translation:",
+                    "doc_to_target": "{{translation[\"en\"]}}",
+                    "metric_list": [
+                        {
+                            "metric": "bleu",
+                            "aggregation": "bleu",
+                            "higher_is_better": true
+                        },
+                        {
+                            "metric": "ter",
+                            "aggregation": "ter",
+                            "higher_is_better": false
+                        },
+                        {
+                            "metric": "chrf",
+                            "aggregation": "chrf",
+                            "higher_is_better": true
+                        }
+                    ],
+                    "generation_kwargs": {
+                        "until": [
+                            "\n"
+                        ],
+                        "do_sample": false,
+                        "temperature": 0.0
+                    },
+                    "repeats": 1,
+                    "metadata": {
+                        "version": 1.0
+                    }
+                }
+            ],
+            "batch_size": 32,
+            "bootstrap_iters": 0,
+            "max_length": 2048,
+            "device": "gpu"
+        }
+    },
+    "evaluator": "zh_en_translation",
+    "target": "local_system",
+    "log_severity_level": 0,
+    "output_dir": "model_pytorch",
+    "cache_dir": "cache_pytorch"
+}
diff --git a/tencent-HY-MT1.5-1.8B/cpu/README.md b/tencent-HY-MT1.5-1.8B/cpu/README.md
@@ -0,0 +1,50 @@
+# tencent-HY-MT1.5-1.8B - CPU Optimization
+
+This folder contains Olive recipes for optimizing `tencent/HY-MT1.5-1.8B` for `CPUExecutionProvider`.
+
+## Recipes
+
+- `tencent-HY-MT1.5-1.8B_cpu_fp32.json`
+- `tencent-HY-MT1.5-1.8B_cpu_fp32_with_eval.json`
+- `tencent-HY-MT1.5-1.8B_cpu_int4.json`
+- `tencent-HY-MT1.5-1.8B_cpu_int4_with_eval.json`
+
+## Setup
+
+```bash
+pip install -r requirements.txt
+```
+
+## Build examples
+
+```bash
+olive run --config tencent-HY-MT1.5-1.8B_cpu_fp32.json
+olive run --config tencent-HY-MT1.5-1.8B_cpu_int4.json
+```
+
+## Build and evaluate with WMT18 Chinese-to-English translation
+
+```bash
+olive run --config tencent-HY-MT1.5-1.8B_cpu_fp32_with_eval.json
+olive run --config tencent-HY-MT1.5-1.8B_cpu_int4_with_eval.json
+```
+
+## Notes
+
+- HY-MT1.5-1.8B config has tie_word_embeddings=true, so TieWordEmbeddings surgery is applied after ModelBuilder.
+- Full precision recipe for this backend uses `fp32`.
+- The primary INT4 recipe uses full INT4 with `group_size: 32`: GPTQ -> RTN -> ModelBuilder.
+- The primary INT4 recipe omits SelectiveMixedPrecision and quantizes embedding / lm_head to INT4 as well.
+- CPU recipes save ONNX weights to `model.onnx.data` through the final GraphSurgeries pass.
+- Full INT4 with `group_size: 32` was selected because it is smaller than the previous CPU INT4 artifact and still improves WMT18 Chinese-to-English quality over the PyTorch baseline when evaluated with CPU EP.
+
+## Evaluation results
+
+WMT18 Chinese-to-English translation, 3,981 test samples. BLEU and chrF are higher-is-better; TER is lower-is-better. CPU ONNX results were evaluated with ORT GenAI using CPU EP.
+
+| Model | Embedding / lm_head | Size | BLEU | chrF | TER |
+| --- | --- | ---: | ---: | ---: | ---: |
+| PyTorch baseline | fp16 | 3.806125 GiB | 12.634090637487896 | 35.290981008260474 | 85.7224234039956 |
+| Current CPU INT4 recipe (full INT4, `group_size: 32`) | int4 | 1.203313 GiB | 16.920205475631327 | 42.982874679434815 | 80.8764291817553 |
+
+The current INT4 artifact is 68.4% smaller than the PyTorch model and improves BLEU by 4.29, chrF by 7.69, and TER by 4.85 points. It is also smaller than the previous CPU INT4 artifact based on SelectiveMixedPrecision `ratio: 0.83` (1.288222 GiB), so the full INT4 `group_size: 32` recipe is the primary CPU INT4 path.
diff --git a/tencent-HY-MT1.5-1.8B/cpu/info.yaml b/tencent-HY-MT1.5-1.8B/cpu/info.yaml
@@ -0,0 +1,18 @@
+arch: hunyuan_v1_dense
+recipes:
+  - name: tencent-HY-MT1.5-1.8B_cpu_fp32
+    file: tencent-HY-MT1.5-1.8B_cpu_fp32.json
+    devices: cpu
+    eps: CPUExecutionProvider
+  - name: tencent-HY-MT1.5-1.8B_cpu_fp32_with_eval
+    file: tencent-HY-MT1.5-1.8B_cpu_fp32_with_eval.json
+    devices: cpu
+    eps: CPUExecutionProvider
+  - name: tencent-HY-MT1.5-1.8B_cpu_int4
+    file: tencent-HY-MT1.5-1.8B_cpu_int4.json
+    devices: cpu
+    eps: CPUExecutionProvider
+  - name: tencent-HY-MT1.5-1.8B_cpu_int4_with_eval
+    file: tencent-HY-MT1.5-1.8B_cpu_int4_with_eval.json
+    devices: cpu
+    eps: CPUExecutionProvider
diff --git a/tencent-HY-MT1.5-1.8B/cpu/requirements.txt b/tencent-HY-MT1.5-1.8B/cpu/requirements.txt
@@ -0,0 +1,9 @@
+accelerate
+datasets
+lm-eval
+olive-ai
+onnxruntime-genai
+sentencepiece
+tiktoken
+torch
+transformers==4.56.1
diff --git a/tencent-HY-MT1.5-1.8B/cpu/tencent-HY-MT1.5-1.8B_cpu_fp32.json b/tencent-HY-MT1.5-1.8B/cpu/tencent-HY-MT1.5-1.8B_cpu_fp32.json
@@ -0,0 +1,44 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "tencent/HY-MT1.5-1.8B",
+        "load_kwargs": {
+            "torch_dtype": "float16"
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "cpu",
+                    "execution_providers": [
+                        "CPUExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "m": {
+            "type": "ModelBuilder",
+            "precision": "fp32"
+        },
+        "t": {
+            "type": "GraphSurgeries",
+            "surgeries": [
+                {
+                    "surgeon": "TieWordEmbeddings"
+                }
+            ],
+            "save_as_external_data": true,
+            "all_tensors_to_one_file": true,
+            "external_data_name": "model.onnx.data"
+        }
+    },
+    "target": "local_system",
+    "log_severity_level": 0,
+    "output_dir": "model_cpu_fp32",
+    "cache_dir": "cache_cpu_fp32",
+    "no_artifacts": true
+}
diff --git a/tencent-HY-MT1.5-1.8B/cpu/tencent-HY-MT1.5-1.8B_cpu_fp32_with_eval.json b/tencent-HY-MT1.5-1.8B/cpu/tencent-HY-MT1.5-1.8B_cpu_fp32_with_eval.json
@@ -0,0 +1,98 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "tencent/HY-MT1.5-1.8B",
+        "load_kwargs": {
+            "torch_dtype": "float16"
+        }
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "cpu",
+                    "execution_providers": [
+                        "CPUExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "m": {
+            "type": "ModelBuilder",
+            "precision": "fp32"
+        },
+        "t": {
+            "type": "GraphSurgeries",
+            "surgeries": [
+                {
+                    "surgeon": "TieWordEmbeddings"
+                }
+            ],
+            "save_as_external_data": true,
+            "all_tensors_to_one_file": true,
+            "external_data_name": "model.onnx.data"
+        }
+    },
+    "target": "local_system",
+    "log_severity_level": 0,
+    "output_dir": "model_cpu_fp32",
+    "cache_dir": "cache_cpu_fp32",
+    "evaluators": {
+        "zh_en_translation": {
+            "type": "LMEvaluator",
+            "tasks": [
+                {
+                    "task": "wmt18-zh-en",
+                    "dataset_path": "wmt/wmt18",
+                    "dataset_name": "zh-en",
+                    "output_type": "generate_until",
+                    "training_split": "train",
+                    "validation_split": "validation",
+                    "fewshot_split": "validation",
+                    "test_split": "test",
+                    "doc_to_text": "Translate the following segment into English, without additional explanation.\n\n{{translation[\"zh\"]}}\nEnglish translation:",
+                    "doc_to_target": "{{translation[\"en\"]}}",
+                    "metric_list": [
+                        {
+                            "metric": "bleu",
+                            "aggregation": "bleu",
+                            "higher_is_better": true
+                        },
+                        {
+                            "metric": "ter",
+                            "aggregation": "ter",
+                            "higher_is_better": false
+                        },
+                        {
+                            "metric": "chrf",
+                            "aggregation": "chrf",
+                            "higher_is_better": true
+                        }
+                    ],
+                    "generation_kwargs": {
+                        "until": [
+                            "\n"
+                        ],
+                        "do_sample": false,
+                        "temperature": 0.0
+                    },
+                    "repeats": 1,
+                    "metadata": {
+                        "version": 1.0
+                    }
+                }
+            ],
+            "batch_size": 1,
+            "bootstrap_iters": 0,
+            "max_length": 2048,
+            "provider_options": {
+                "enable_skip_layer_norm_strict_mode": "1"
+            }
+        }
+    },
+    "evaluator": "zh_en_translation",
+    "evaluate_input_model": false
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		The upstream model is distributed under the Tencent HY Community License Agreement.
		See https://huggingface.co/tencent/HY-MT1.5-1.8B/blob/main/License.txt.