diff --git a/.gitignore b/.gitignore index ab865aa23..e2048a33c 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ build/ htmlcov/ .coverage* coverage.xml +.test-venv/ diff --git a/python/examples/orca/README.rst b/python/examples/orca/README.rst new file mode 100644 index 000000000..faa589f05 --- /dev/null +++ b/python/examples/orca/README.rst @@ -0,0 +1,233 @@ +ORCA external optimizer example +================================ + +This directory contains wrappers that let `ORCA`_ drive geometry optimization +(and other workflows using the external-tool interface) with a metatomic +machine-learning potential. + +The wrappers implement the file protocol documented in `orca-external-tools`_. +On each ORCA step they read ``*.extinp.tmp`` and the accompanying XYZ geometry, +evaluate energy and gradient with :py:class:`metatomic_ase.MetatomicCalculator`, +and write ``*.engrad`` back for ORCA. + +.. _ORCA: https://www.faccts.de/orca/ +.. _orca-external-tools: https://github.com/faccts/orca-external-tools#interface + +Prerequisites +------------- + +- ORCA 6 or newer (``ProgExt`` / ``Ext_Params`` in the input file) +- Python packages ``metatomic``, ``metatomic-ase``, and their dependencies +- An exported metatomic model (``.pt``), plus an ``extensions/`` directory if the + model requires compiled extensions + +Files +----- + +``orca_common.py`` + Shared protocol parsing, unit conversion, and job evaluation logic. + +``metatomic-orca-external`` + Standalone script invoked by ORCA via ``%method ProgExt``. Reloads the model + on every ORCA call. + +``metatomic-orca-server`` + Persistent HTTP server that keeps the model resident in memory. + +``metatomic-orca-client`` + Thin ORCA-facing client that forwards jobs to ``metatomic-orca-server``. + +``water_opt/water.xyz`` + Starting water geometry for a test optimization. + +``water_opt/water_opt.inp`` + ORCA input template using the server/client setup. Edit paths before running. + +Recommended setup (server/client) +--------------------------------- + +For production workflows (geometry optimization, NEB, GOAT), start a persistent +server so ORCA does not reload the PyTorch model on every energy/gradient call. + +1. Install metatomic and metatomic-ase in the Python environment ORCA will use. + +2. Start the server in one terminal (use ``--warmup`` to load the model + immediately):: + + metatomic-orca-server \ + --model /path/to/model-md.pt \ + --extensions-directory /path/to/extensions \ + --device cuda \ + --warmup + + You can also set ``METATOMIC_MODEL``, ``METATOMIC_EXTENSIONS``, and + ``METATOMIC_DEVICE`` instead of passing flags. + +3. Edit ``water_opt/water_opt.inp``: + + - ``ProgExt`` must point to ``metatomic-orca-client`` (absolute path) + - ``Ext_Params`` should pass ``-b hostname:port`` if not using the default + ``127.0.0.1:8888`` + + Example:: + + %method + ProgExt "/home/user/metatomic/python/examples/orca/metatomic-orca-client" + Ext_Params "-b 127.0.0.1:8888" + end + + Model paths are configured on the server. To override per job, add + ``--model`` / ``--extensions-directory`` to ``Ext_Params``. + +4. Run ORCA from the example directory:: + + cd water_opt + orca water_opt.inp > job.out + +Standalone mode +--------------- + +For quick tests, ORCA can call ``metatomic-orca-external`` directly:: + + %method + ProgExt "/home/user/metatomic/python/examples/orca/metatomic-orca-external" + Ext_Params "--model /home/user/models/model-md.pt --extensions-directory /home/user/models/extensions" + end + +Each ORCA step starts a new Python process and reloads the model, which is +simple but slow for long optimizations. + +Parallelism: ORCA PAL, ``NCores``, and PyTorch threading +-------------------------------------------------------- + +ORCA does **not** parallelize the external program for you. It only reports how +many cores were allocated in each ``*.extinp.tmp`` file as ``NCores``. The +wrapper reads that value and configures CPU threading before each evaluation: + +- ``torch.set_num_threads(NCores)`` +- ``OMP_NUM_THREADS``, ``MKL_NUM_THREADS``, ``OPENBLAS_NUM_THREADS``, and related + variables set to ``NCores`` + +Set ``METATOMIC_DISABLE_THREADING_CONFIG=1`` if you prefer to manage these +variables yourself (for example in your job scheduler script). + +Matching ORCA and metatomic resources +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For a single geometry optimization (one external call at a time), choose PAL so +that ``NCores`` matches the CPU cores you want PyTorch to use: + +.. code-block:: text + + ! ExtOpt Opt PAL4 + + %pal + nprocs 4 + end + + %method + ProgExt "/path/to/metatomic-orca-client" + Ext_Params "-b 127.0.0.1:8888" + end + +For multi-image workflows (NEB, GOAT, numerical frequencies), ORCA can run +several external calls in parallel. Use ``nprocs_group`` to set how many cores +each external call receives (see the `ORCA parallel manual`_): + +.. code-block:: text + + ! ExtOpt NEB-CI PAL8 + + %pal + nprocs 8 + nprocs_group 4 + end + +Here ORCA may launch two external evaluations at once, each with ``NCores = 4``. +Start **one** ``metatomic-orca-server`` per distinct ``NCores``/GPU combination, +or run standalone wrappers and let the scheduler place one process per core +group. Avoid oversubscribing: if ``NCores = 4``, do not let multiple concurrent +wrapper processes each spawn 4 OpenMP threads on the same 4 physical cores. + +GPU evaluation +~~~~~~~~~~~~~~ + +For GPU inference, start the server with ``--device cuda`` (or set +``METATOMIC_DEVICE=cuda``). ORCA ``NCores`` then mainly controls CPU-side work +such as neighbor-list construction; the model forward pass runs on the GPU +selected by ``CUDA_VISIBLE_DEVICES`` on the server host. + +Example server startup on a GPU node: + +.. code-block:: bash + + export CUDA_VISIBLE_DEVICES=0 + metatomic-orca-server \ + --model /path/to/model-md.pt \ + --extensions-directory /path/to/extensions \ + --device cuda \ + --warmup + +Keep ORCA ``PAL``/``NCores`` modest on GPU nodes unless CPU neighbor builds are +the bottleneck. A practical starting point is ``PAL1`` or ``PAL2`` for the +external call when using GPU inference. + +.. _ORCA parallel manual: https://www.faccts.de/docs/orca/6.1/manual/contents/essentialelements/parallel.html + +Expected outputs +---------------- + +- ``water_opt.engrad`` — energy and gradient written each step +- ``water_opt.xyz`` — final optimized geometry +- ``water_opt_trj.xyz`` — optimization trajectory (if ORCA writes it) + +Standalone test (without ORCA) +------------------------------ + +Smoke-test the standalone wrapper if ORCA has already created an +``*.extinp.tmp`` file, or craft one following the `interface specification`_:: + + ./metatomic-orca-external water_opt_EXT.extinp.tmp \ + --model /path/to/model-md.pt \ + --extensions-directory /path/to/extensions + +Test the client against a running server:: + + metatomic-orca-server --model /path/to/model-md.pt --warmup + metatomic-orca-client -b 127.0.0.1:8888 water_opt_EXT.extinp.tmp + +.. _interface specification: https://github.com/faccts/orca-external-tools#interface + +Troubleshooting +--------------- + +**ORCA cannot find the script** + Use an absolute path in ``ProgExt``. ORCA's working directory may differ + from where you launch the job. + +**Connection error from the client** + Ensure ``metatomic-orca-server`` is running and that ``-b`` matches the + server bind address. + +**Model or extensions not found** + Pass absolute paths to ``metatomic-orca-server``, or set ``METATOMIC_MODEL`` + and ``METATOMIC_EXTENSIONS``. + +**Point charges** + ORCA point-charge files (``pointcharges.pc``) are not supported in this + version. + +**CPU oversubscription / slow runs** + Check that ORCA ``PAL``/``NCores`` matches the threading configured by the + wrapper. Use ``METATOMIC_DISABLE_THREADING_CONFIG=1`` only when setting + ``OMP_NUM_THREADS``/``MKL_NUM_THREADS`` manually. + +Related +------- + +- `metatomic issue #228`_ +- `ORCA external optimizer tutorial`_ +- `orca-external-tools`_ + +.. _metatomic issue #228: https://github.com/metatensor/metatomic/issues/228 +.. _ORCA external optimizer tutorial: https://www.faccts.de/docs/orca/6.1/tutorials/workflows/extopt.html diff --git a/python/examples/orca/metatomic-orca-client b/python/examples/orca/metatomic-orca-client new file mode 100755 index 000000000..687eefb29 --- /dev/null +++ b/python/examples/orca/metatomic-orca-client @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +"""ORCA client that forwards external-tool jobs to a persistent Metatomic server.""" + +from __future__ import annotations + +import json +import os +import sys +import traceback +import urllib.error +import urllib.request +from argparse import ArgumentParser + +DEFAULT_BIND = "127.0.0.1:8888" + + +def send_to_server(host_port: str, arguments: list[str], *, working_directory: str) -> None: + """Forward a calculation request to ``metatomic-orca-server``.""" + host, port = host_port.split(":", 1) + url = f"http://{host}:{port}/calculate" + payload = {"arguments": arguments, "directory": working_directory} + request = urllib.request.Request( + url, + data=json.dumps(payload).encode("utf-8"), + headers={"Content-Type": "application/json"}, + method="POST", + ) + + try: + with urllib.request.urlopen(request, timeout=None) as response: + data = json.loads(response.read().decode("utf-8")) + except urllib.error.HTTPError as exc: + body = exc.read().decode("utf-8", errors="replace") + print(f"HTTP error {exc.code}: {body}", file=sys.stderr) + raise SystemExit(1) from exc + except urllib.error.URLError as exc: + print(f"Connection error: {exc}", file=sys.stderr) + raise SystemExit(1) from exc + except Exception as exc: + print(f"Unexpected error: {type(exc).__name__}: {exc}", file=sys.stderr) + traceback.print_exc() + raise SystemExit(1) from exc + + print(data.get("stdout", ""), end="") + if data.get("status") != "Success": + print( + f"Server error {data.get('error_type')}: {data.get('error_message')}.", + file=sys.stderr, + ) + if data.get("traceback"): + print(data["traceback"], file=sys.stderr) + raise SystemExit(1) + + +def build_client_parser() -> ArgumentParser: + parser = ArgumentParser( + prog="metatomic-orca-client", + description="Forward ORCA external-tool jobs to a running metatomic-orca-server.", + ) + parser.add_argument( + "-b", + "--bind", + metavar="hostname:port", + default=DEFAULT_BIND, + dest="host_port", + help=f"Server bind address and port. Default: {DEFAULT_BIND}.", + ) + return parser + + +def main(argv: list[str] | None = None) -> None: + parser = build_client_parser() + args, remaining_args = parser.parse_known_args(argv) + send_to_server( + args.host_port, + remaining_args, + working_directory=os.getcwd(), + ) + + +if __name__ == "__main__": + main() diff --git a/python/examples/orca/metatomic-orca-external b/python/examples/orca/metatomic-orca-external new file mode 100755 index 000000000..d479bc389 --- /dev/null +++ b/python/examples/orca/metatomic-orca-external @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +"""Metatomic ML potential wrapper for ORCA's external-tool interface.""" + +from __future__ import annotations + +import sys +from pathlib import Path + +EXAMPLE_DIR = Path(__file__).resolve().parent +if str(EXAMPLE_DIR) not in sys.path: + sys.path.insert(0, str(EXAMPLE_DIR)) + +from orca_common import ( # noqa: E402 + build_runner_parser, + run_orca_job, + settings_from_namespace, +) + + +def main(argv: list[str] | None = None) -> None: + parser = build_runner_parser( + prog="metatomic-orca-external", + description="Metatomic ML potential wrapper for ORCA's external-tool interface.", + ) + args = parser.parse_args(argv) + try: + settings = settings_from_namespace(args) + run_orca_job(args.inputfile, settings) + except (ValueError, FileNotFoundError) as exc: + raise SystemExit(str(exc)) from exc + + +if __name__ == "__main__": + main() diff --git a/python/examples/orca/metatomic-orca-server b/python/examples/orca/metatomic-orca-server new file mode 100755 index 000000000..ffe9f1f2c --- /dev/null +++ b/python/examples/orca/metatomic-orca-server @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 +"""Persistent server for Metatomic ORCA external-tool calculations.""" + +from __future__ import annotations + +import argparse +import io +import json +import logging +import os +import sys +import threading +import traceback +from contextlib import redirect_stdout +from dataclasses import dataclass, field +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +from pathlib import Path +from typing import Any + +EXAMPLE_DIR = Path(__file__).resolve().parent +if str(EXAMPLE_DIR) not in sys.path: + sys.path.insert(0, str(EXAMPLE_DIR)) + +from orca_common import ( # noqa: E402 + MetatomicOrcaSettings, + OrcaPreparedJob, + add_model_arguments, + get_calculator, + prepare_orca_job_from_arguments, + run_prepared_jobs, + settings_from_namespace, +) + +DEFAULT_BIND = "127.0.0.1:8888" + + +@dataclass +class _PendingRequest: + prepared: OrcaPreparedJob + done: threading.Event = field(default_factory=threading.Event) + result: dict[str, Any] | None = None + + +class MetatomicOrcaServer: + """Handle ORCA external-tool requests with a resident model in memory.""" + + def __init__(self, default_settings: MetatomicOrcaSettings | None = None) -> None: + self.default_settings = default_settings + self._queue: list[_PendingRequest] = [] + self._queue_cond = threading.Condition() + self._shutdown = False + self._worker = threading.Thread( + target=self._worker_loop, + name="metatomic-orca-worker", + daemon=True, + ) + self._worker.start() + + def shutdown(self) -> None: + """Stop the background worker.""" + with self._queue_cond: + self._shutdown = True + self._queue_cond.notify_all() + self._worker.join(timeout=5.0) + + def warmup(self) -> None: + """Load the default model into the process cache.""" + if self.default_settings is None: + return + get_calculator(self.default_settings) + + def handle(self, arguments: list[str], directory: str) -> dict[str, Any]: + """Enqueue a request and block until the engrad file is written.""" + try: + prepared = prepare_orca_job_from_arguments( + arguments, + directory, + default_settings=self.default_settings, + ) + except Exception as exc: + return { + "status": "Error", + "error_message": str(exc), + "error_type": type(exc).__name__, + "traceback": traceback.format_exc(), + } + + pending = _PendingRequest(prepared=prepared) + with self._queue_cond: + self._queue.append(pending) + self._queue_cond.notify() + pending.done.wait() + assert pending.result is not None + return pending.result + + def _worker_loop(self) -> None: + while True: + with self._queue_cond: + while not self._shutdown and not self._queue: + self._queue_cond.wait() + if self._shutdown and not self._queue: + return + pending = self._queue.pop(0) + + try: + buf = io.StringIO() + with redirect_stdout(buf): + run_prepared_jobs([pending.prepared]) + pending.result = {"status": "Success", "stdout": buf.getvalue()} + except Exception as exc: + pending.result = { + "status": "Error", + "error_message": str(exc), + "error_type": type(exc).__name__, + "traceback": traceback.format_exc(), + } + finally: + pending.done.set() + + +def create_handler(server: MetatomicOrcaServer) -> type[BaseHTTPRequestHandler]: + class Handler(BaseHTTPRequestHandler): + server_version = "metatomic-orca-server/1.0" + + def log_message(self, format: str, *args) -> None: + logging.info("%s - %s", self.address_string(), format % args) + + def _send_json(self, status_code: int, payload: dict[str, Any]) -> None: + body = json.dumps(payload).encode("utf-8") + self.send_response(status_code) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def do_GET(self) -> None: + if self.path == "/healthz": + self._send_json(200, {"status": "OK"}) + return + self._send_json(404, {"status": "Error", "error_message": "Not found"}) + + def do_POST(self) -> None: + if self.path != "/calculate": + self._send_json(404, {"status": "Error", "error_message": "Not found"}) + return + + try: + length = int(self.headers.get("Content-Length", "0")) + raw = self.rfile.read(length) + payload = json.loads(raw.decode("utf-8")) + arguments = payload["arguments"] + directory = payload["directory"] + except (KeyError, TypeError, ValueError, json.JSONDecodeError) as exc: + self._send_json( + 400, + { + "status": "Error", + "error_message": f"Invalid request payload: {exc}", + "error_type": type(exc).__name__, + }, + ) + return + + try: + result = server.handle(arguments, directory) + status_code = 200 if result.get("status") == "Success" else 500 + self._send_json(status_code, result) + except Exception as exc: + self._send_json( + 500, + { + "status": "Error", + "error_message": str(exc), + "error_type": type(exc).__name__, + "traceback": traceback.format_exc(), + }, + ) + + return Handler + + +def build_server_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="metatomic-orca-server", + description=( + "Start a persistent Metatomic server for ORCA external-tool calculations. " + "Point ORCA ProgExt at metatomic-orca-client." + ), + ) + parser.add_argument( + "-b", + "--bind", + metavar="hostname:port", + default=DEFAULT_BIND, + dest="host_port", + help=f"Server bind address and port. Default: {DEFAULT_BIND}.", + ) + parser.add_argument( + "--warmup", + action="store_true", + help="Load the default model at startup (recommended).", + ) + add_model_arguments(parser) + return parser + + +def main(argv: list[str] | None = None) -> None: + logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") + parser = build_server_parser() + args = parser.parse_args(argv) + + default_settings: MetatomicOrcaSettings | None = None + if args.model or os.environ.get("METATOMIC_MODEL"): + default_settings = settings_from_namespace(args) + + server = MetatomicOrcaServer(default_settings=default_settings) + if args.warmup: + logging.info("Warming up metatomic model...") + server.warmup() + + host, port = args.host_port.split(":", 1) + handler = create_handler(server) + httpd = ThreadingHTTPServer((host, int(port)), handler) + logging.info("metatomic-orca-server listening on %s:%s", host, port) + try: + httpd.serve_forever() + except KeyboardInterrupt: + logging.info("Shutting down metatomic-orca-server...") + finally: + server.shutdown() + httpd.server_close() + + +if __name__ == "__main__": + main() diff --git a/python/examples/orca/orca_common.py b/python/examples/orca/orca_common.py new file mode 100644 index 000000000..15167a928 --- /dev/null +++ b/python/examples/orca/orca_common.py @@ -0,0 +1,453 @@ +"""Shared helpers for Metatomic ORCA external-tool scripts.""" + +from __future__ import annotations + +import os +import warnings +from argparse import ArgumentParser, Namespace +from dataclasses import dataclass +from pathlib import Path + +import ase.units +import numpy as np +from ase import Atoms +from ase.data import atomic_numbers +from metatomic_ase import MetatomicCalculator + + +@dataclass(frozen=True) +class ExtInpData: + """Parsed contents of an ORCA ``*.extinp.tmp`` file.""" + + xyz_path: Path + charge: int + multiplicity: int + ncores: int + do_gradient: bool + pointcharges_path: Path | None = None + + +@dataclass(frozen=True) +class MetatomicOrcaSettings: + model: Path + extensions_directory: Path | None = None + device: str | None = None + + +@dataclass(frozen=True) +class OrcaPreparedJob: + """Parsed ORCA external-tool job ready for evaluation.""" + + input_path: Path + extinp: ExtInpData + settings: MetatomicOrcaSettings + atoms: Atoms + + +def _first_field(line: str) -> str: + """Return the first whitespace-separated token, ignoring ``#`` comments.""" + stripped = line.split("#", 1)[0].strip() + if not stripped: + return "" + return stripped.split()[0] + + +def read_extinp(inputfile: str | Path) -> ExtInpData: + """Parse an ORCA external-tool input file.""" + input_path = Path(inputfile) + if not input_path.is_file(): + raise FileNotFoundError(f"Input file not found: {input_path}") + + fields: list[str] = [] + with input_path.open() as handle: + for line in handle: + token = _first_field(line) + if token: + fields.append(token) + + if len(fields) < 5: + raise ValueError(f"ORCA extinp file has too few fields: {input_path}") + + xyz_name = fields[0] + charge = int(fields[1]) + multiplicity = int(fields[2]) + ncores = int(fields[3]) + do_gradient_flag = int(fields[4]) + + if do_gradient_flag not in (0, 1): + raise ValueError("do_gradient from ORCA input must be 0 or 1.") + if multiplicity < 1: + raise ValueError("Multiplicity must be a positive integer.") + if ncores < 1: + raise ValueError("NCores must be a positive integer.") + + xyz_path = Path(xyz_name) + if not xyz_path.is_absolute(): + xyz_path = (input_path.parent / xyz_path).resolve() + + pointcharges_path = None + if len(fields) >= 6: + pc_name = fields[5] + pointcharges_path = Path(pc_name) + if not pointcharges_path.is_absolute(): + pointcharges_path = (input_path.parent / pointcharges_path).resolve() + + return ExtInpData( + xyz_path=xyz_path, + charge=charge, + multiplicity=multiplicity, + ncores=ncores, + do_gradient=bool(do_gradient_flag), + pointcharges_path=pointcharges_path, + ) + + +def read_xyz( + xyz_file: str | Path, +) -> tuple[list[str], list[tuple[float, float, float]]]: + """Read element symbols and Cartesian coordinates (Angstrom) from XYZ.""" + symbols: list[str] = [] + coordinates: list[tuple[float, float, float]] = [] + xyz_path = Path(xyz_file) + with xyz_path.open() as handle: + natoms = int(handle.readline().strip()) + handle.readline() # comment line + for _ in range(natoms): + line = handle.readline() + if not line: + break + parts = line.split() + symbols.append(parts[0]) + coordinates.append((float(parts[1]), float(parts[2]), float(parts[3]))) + return symbols, coordinates + + +def write_engrad( + filename: str | Path, + natoms: int, + energy_hartree: float, + gradient_hartree_bohr: list[float] | None = None, +) -> None: + """Write ORCA ``*.engrad`` output (energy in Eh, gradient in Eh/bohr).""" + output_path = Path(filename) + lines = [ + "#", + "# Number of atoms", + "#", + f"{natoms}", + "#", + "# Total energy [Eh]", + "#", + f"{energy_hartree:.12e}", + ] + if gradient_hartree_bohr: + lines.extend( + [ + "#", + "# Gradient [Eh/Bohr] A1X, A1Y, A1Z, A2X, ...", + "#", + ] + ) + lines.extend(f"{value: .12e}" for value in gradient_hartree_bohr) + + try: + output_path.write_text("\n".join(lines) + "\n") + except OSError as exc: + raise RuntimeError(f"Failed to write ORCA output file {output_path}: {exc}") from exc + + +_CALCULATOR_CACHE: dict[tuple[str, str | None, str | None], MetatomicCalculator] = {} +_CONFIGURED_NCORES: int | None = None + +_THREAD_ENV_VARS = ( + "OMP_NUM_THREADS", + "MKL_NUM_THREADS", + "OPENBLAS_NUM_THREADS", + "VECLIB_MAXIMUM_THREADS", + "NUMEXPR_NUM_THREADS", +) + + +def configure_cpu_threading(ncores: int) -> int: + """Configure PyTorch and BLAS CPU threading from ORCA's ``NCores`` field. + + ORCA passes the number of cores allocated to the external program in each + ``*.extinp.tmp`` file. This helper maps that value to ``torch`` intra-op + threads and common BLAS/OpenMP environment variables. + + Set ``METATOMIC_DISABLE_THREADING_CONFIG=1`` to skip automatic configuration + (for example when you manage ``OMP_NUM_THREADS`` yourself). + """ + ncores = max(1, int(ncores)) + if os.environ.get("METATOMIC_DISABLE_THREADING_CONFIG"): + return ncores + + global _CONFIGURED_NCORES + if _CONFIGURED_NCORES == ncores: + return ncores + + for var in _THREAD_ENV_VARS: + os.environ[var] = str(ncores) + + try: + import torch + except ImportError: + torch = None + + if torch is not None: + torch.set_num_threads(ncores) + interop_threads = min(ncores, 2) + try: + torch.set_num_interop_threads(interop_threads) + except RuntimeError: + # PyTorch allows setting interop threads only once per process. + pass + + _CONFIGURED_NCORES = ncores + return ncores + + +def reset_threading_config() -> None: + """Reset cached threading configuration (useful in tests).""" + global _CONFIGURED_NCORES + _CONFIGURED_NCORES = None + + +def settings_cache_key(settings: MetatomicOrcaSettings) -> tuple[str, str | None, str | None]: + extensions = ( + str(settings.extensions_directory.resolve()) + if settings.extensions_directory is not None + else None + ) + return (str(settings.model.resolve()), extensions, settings.device) + + +def get_calculator(settings: MetatomicOrcaSettings) -> MetatomicCalculator: + """Return a cached Metatomic calculator for the given settings.""" + key = settings_cache_key(settings) + cached = _CALCULATOR_CACHE.get(key) + if cached is not None: + return cached + + extensions = key[1] + calculator = MetatomicCalculator( + str(settings.model), + extensions_directory=extensions, + device=settings.device, + ) + _CALCULATOR_CACHE[key] = calculator + return calculator + + +def clear_calculator_cache() -> None: + """Drop cached calculators (useful in tests).""" + _CALCULATOR_CACHE.clear() + reset_threading_config() + + +def atoms_from_xyz(xyz_file: str | Path) -> Atoms: + """Build an ASE ``Atoms`` object from an ORCA XYZ file.""" + symbols, coordinates = read_xyz(xyz_file) + numbers = [atomic_numbers[symbol.capitalize()] for symbol in symbols] + return Atoms(numbers=numbers, positions=np.asarray(coordinates, dtype=float)) + + +def atoms_from_extinp(extinp: ExtInpData) -> Atoms: + """Build an ASE ``Atoms`` object from parsed ORCA external-tool input.""" + atoms = atoms_from_xyz(extinp.xyz_path) + atoms.info["charge"] = extinp.charge + atoms.info["spin_multiplicity"] = extinp.multiplicity + return atoms + + +def forces_to_orca_gradient(forces_ev_angstrom: np.ndarray) -> np.ndarray: + """Convert ASE forces (eV/Å) to ORCA gradients (Eh/bohr).""" + gradient_ev_angstrom = -np.asarray(forces_ev_angstrom, dtype=float) + return (gradient_ev_angstrom * ase.units.Bohr / ase.units.Hartree).reshape(-1) + + +def evaluate_structure( + atoms: Atoms, + calculator: MetatomicCalculator, + *, + do_gradient: bool, +) -> tuple[float, list[float]]: + """Evaluate energy (Eh) and optional gradient (Eh/bohr) for ``atoms``.""" + atoms.calc = calculator + energy_hartree = float(atoms.get_potential_energy() / ase.units.Hartree) + + gradient: list[float] = [] + if do_gradient: + forces = atoms.get_forces() + gradient = forces_to_orca_gradient(forces).tolist() + + return energy_hartree, gradient + + +def _warn_unsupported_extinp_fields(extinp: ExtInpData) -> None: + if extinp.pointcharges_path is not None: + warnings.warn( + "Metatomic ORCA external tool does not incorporate ORCA point charges.", + UserWarning, + stacklevel=2, + ) + + +def add_model_arguments(parser: ArgumentParser) -> None: + """Register model-related flags on ``parser``.""" + parser.add_argument( + "--model", + default=os.environ.get("METATOMIC_MODEL"), + help="Path to an exported metatomic model (.pt). Can also be set with METATOMIC_MODEL.", + ) + parser.add_argument( + "--extensions-directory", + default=os.environ.get("METATOMIC_EXTENSIONS"), + help=( + "Directory containing compiled model extensions. " + "Can also be set with METATOMIC_EXTENSIONS." + ), + ) + parser.add_argument( + "--device", + default=os.environ.get("METATOMIC_DEVICE"), + help="Torch device for model evaluation (e.g. cpu, cuda).", + ) + + +def settings_from_namespace( + args: Namespace, + *, + default_settings: MetatomicOrcaSettings | None = None, +) -> MetatomicOrcaSettings: + """Build settings from parsed CLI args, optionally inheriting server defaults.""" + model_value = args.model + if not model_value and default_settings is not None: + model_value = str(default_settings.model) + + if not model_value: + raise ValueError( + "Missing model path. Pass --model PATH, set METATOMIC_MODEL, " + "or start the server with a default model." + ) + + model = Path(model_value).expanduser().resolve() + if not model.is_file(): + raise FileNotFoundError(f"Model file not found: {model}") + + extensions_value = args.extensions_directory + if not extensions_value and default_settings is not None: + extensions_value = ( + str(default_settings.extensions_directory) + if default_settings.extensions_directory is not None + else None + ) + + extensions_directory = None + if extensions_value: + extensions_directory = Path(extensions_value).expanduser().resolve() + if not extensions_directory.is_dir(): + raise FileNotFoundError( + f"Extensions directory not found: {extensions_directory}" + ) + + device = args.device + if device is None and default_settings is not None: + device = default_settings.device + + return MetatomicOrcaSettings( + model=model, + extensions_directory=extensions_directory, + device=device, + ) + + +def build_runner_parser(*, prog: str, description: str) -> ArgumentParser: + """CLI argument parser for standalone ORCA external-tool runs.""" + parser = ArgumentParser(prog=prog, description=description) + parser.add_argument("inputfile", help="ORCA *.extinp.tmp file") + add_model_arguments(parser) + return parser + + +def parse_runner_arguments( + arguments: list[str] | None = None, + *, + default_settings: MetatomicOrcaSettings | None = None, +) -> tuple[Path, MetatomicOrcaSettings]: + """Parse ORCA/client argument vectors into an input path and settings.""" + parser = build_runner_parser( + prog="metatomic-orca-external", + description="Metatomic ML potential wrapper for ORCA's external-tool interface.", + ) + args = parser.parse_args(arguments) + settings = settings_from_namespace(args, default_settings=default_settings) + return Path(args.inputfile), settings + + +def prepare_orca_job( + inputfile: str | Path, + *, + settings: MetatomicOrcaSettings, +) -> OrcaPreparedJob: + """Parse an ORCA external-tool input file into a prepared job.""" + input_path = Path(inputfile).resolve() + extinp = read_extinp(input_path) + _warn_unsupported_extinp_fields(extinp) + + if not extinp.xyz_path.is_file(): + raise FileNotFoundError(f"XYZ file not found: {extinp.xyz_path}") + + return OrcaPreparedJob( + input_path=input_path, + extinp=extinp, + settings=settings, + atoms=atoms_from_extinp(extinp), + ) + + +def prepare_orca_job_from_arguments( + arguments: list[str], + directory: str, + *, + default_settings: MetatomicOrcaSettings | None = None, +) -> OrcaPreparedJob: + """Parse an ORCA/client argument vector into a prepared job.""" + working_dir = Path(directory).resolve() + if not working_dir.is_dir(): + raise ValueError(f"Invalid directory: {working_dir}") + + inputfile, settings = parse_runner_arguments( + arguments, + default_settings=default_settings, + ) + return prepare_orca_job(working_dir / inputfile, settings=settings) + + +def run_prepared_jobs(jobs: list[OrcaPreparedJob]) -> list[Path]: + """Evaluate one or more prepared ORCA jobs.""" + engrad_paths: list[Path] = [] + for job in jobs: + configure_cpu_threading(job.extinp.ncores) + calculator = get_calculator(job.settings) + energy, gradient = evaluate_structure( + job.atoms, + calculator, + do_gradient=job.extinp.do_gradient, + ) + basename = job.extinp.xyz_path.name.removesuffix(".xyz") + engrad_path = job.input_path.parent / f"{basename}.engrad" + write_engrad( + engrad_path, + natoms=len(job.atoms), + energy_hartree=energy, + gradient_hartree_bohr=gradient or None, + ) + engrad_paths.append(engrad_path) + return engrad_paths + + +def run_orca_job(inputfile: str | Path, settings: MetatomicOrcaSettings) -> Path: + """Parse ``inputfile``, evaluate the structure, and write ``*.engrad``.""" + job = prepare_orca_job(inputfile, settings=settings) + return run_prepared_jobs([job])[0] diff --git a/python/examples/orca/test_protocol.py b/python/examples/orca/test_protocol.py new file mode 100644 index 000000000..5f4e16462 --- /dev/null +++ b/python/examples/orca/test_protocol.py @@ -0,0 +1,276 @@ +"""Smoke tests for the ORCA external-tool file protocol (no ORCA required).""" + +from __future__ import annotations + +import importlib.util +import json +import os +import sys +import threading +import urllib.request +from http.server import ThreadingHTTPServer +from importlib.machinery import SourceFileLoader +from pathlib import Path +from unittest.mock import MagicMock + +import ase.units +import numpy as np +import pytest +from ase.calculators.calculator import Calculator, all_changes + +EXAMPLE_DIR = Path(__file__).resolve().parent +COMMON_MODULE = EXAMPLE_DIR / "orca_common.py" +SERVER_MODULE = EXAMPLE_DIR / "metatomic-orca-server" +CLIENT_MODULE = EXAMPLE_DIR / "metatomic-orca-client" + + +def load_module(path: Path, name: str): + mock_metatomic_ase = MagicMock() + sys.modules.setdefault("metatomic_ase", mock_metatomic_ase) + + loader = SourceFileLoader(name, str(path)) + spec = importlib.util.spec_from_loader(name, loader) + assert spec is not None + module = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = module + loader.exec_module(module) + return module + + +@pytest.fixture +def orca(): + module = load_module(COMMON_MODULE, "orca_common") + module.clear_calculator_cache() + yield module + module.clear_calculator_cache() + + +@pytest.fixture +def orca_server(orca): + module = load_module(SERVER_MODULE, "metatomic_orca_server") + return module + + +@pytest.fixture +def orca_client(): + return load_module(CLIENT_MODULE, "metatomic_orca_client") + + +class FixedEnergyCalculator(Calculator): + implemented_properties = ["energy", "forces"] + + def __init__(self, energy_ev: float, forces_ev_angstrom: np.ndarray): + super().__init__() + self.energy_ev = energy_ev + self.forces_ev_angstrom = np.asarray(forces_ev_angstrom, dtype=float) + + def calculate(self, atoms=None, properties=None, system_changes=all_changes): + self.results = { + "energy": self.energy_ev, + "forces": self.forces_ev_angstrom.copy(), + } + + +def _write_water_job(tmp_path: Path) -> Path: + xyz_path = tmp_path / "water.xyz" + xyz_path.write_text( + "3\n" + "water\n" + "O 0.0 0.0 0.1173\n" + "H 0.0 0.7572 -0.4692\n" + "H 0.0 -0.7572 -0.4692\n" + ) + extinp_path = tmp_path / "water_EXT.extinp.tmp" + extinp_path.write_text( + f"{xyz_path.name}\n" + "0\n" + "1\n" + "1\n" + "1\n" + ) + return extinp_path + + +def test_read_extinp_and_xyz(tmp_path, orca): + xyz_path = tmp_path / "water.xyz" + xyz_path.write_text( + "3\n" + "water\n" + "O 0.0 0.0 0.1173\n" + "H 0.0 0.7572 -0.4692\n" + "H 0.0 -0.7572 -0.4692\n" + ) + extinp_path = tmp_path / "job_EXT.extinp.tmp" + extinp_path.write_text( + f"{xyz_path.name}\n" + "0\n" + "1\n" + "1\n" + "1\n" + ) + + extinp = orca.read_extinp(extinp_path) + assert extinp.charge == 0 + assert extinp.multiplicity == 1 + assert extinp.do_gradient is True + assert extinp.xyz_path == xyz_path.resolve() + + symbols, coords = orca.read_xyz(xyz_path) + assert symbols == ["O", "H", "H"] + assert len(coords) == 3 + + +def test_write_engrad_roundtrip(tmp_path, orca): + gradient = [0.0, 0.0, 0.1, -0.05, 0.0, 0.0, -0.05, 0.0, 0.0] + engrad_path = tmp_path / "job.engrad" + orca.write_engrad(engrad_path, natoms=3, energy_hartree=-0.5, gradient_hartree_bohr=gradient) + + text = engrad_path.read_text() + assert "3" in text + assert "-5.000000000000e-01" in text + assert "1.000000000000e-01" in text + + +def test_forces_to_orca_gradient(orca): + forces = np.array([[0.1, 0.0, 0.0], [0.0, 0.2, 0.0]]) + gradient = orca.forces_to_orca_gradient(forces) + expected = -forces * ase.units.Bohr / ase.units.Hartree + np.testing.assert_allclose(gradient, expected.reshape(-1)) + + +def test_configure_cpu_threading(orca, monkeypatch): + monkeypatch.delenv("METATOMIC_DISABLE_THREADING_CONFIG", raising=False) + orca.reset_threading_config() + + mock_torch = MagicMock() + monkeypatch.setitem(sys.modules, "torch", mock_torch) + + configured = orca.configure_cpu_threading(4) + assert configured == 4 + assert os.environ["OMP_NUM_THREADS"] == "4" + assert os.environ["MKL_NUM_THREADS"] == "4" + mock_torch.set_num_threads.assert_called_once_with(4) + mock_torch.set_num_interop_threads.assert_called_once_with(2) + + mock_torch.reset_mock() + orca.configure_cpu_threading(4) + mock_torch.set_num_threads.assert_not_called() + + monkeypatch.setenv("METATOMIC_DISABLE_THREADING_CONFIG", "1") + orca.reset_threading_config() + mock_torch.reset_mock() + orca.configure_cpu_threading(8) + mock_torch.set_num_threads.assert_not_called() + + +def test_run_orca_job_configures_threads(tmp_path, orca, monkeypatch): + extinp_path = _write_water_job(tmp_path) + + energy_ev = -27.2 + forces = np.zeros((3, 3)) + forces[0, 2] = 0.01 + fake_calc = FixedEnergyCalculator(energy_ev, forces) + + settings = orca.MetatomicOrcaSettings(model=tmp_path / "fake.pt") + settings.model.write_text("placeholder") + + monkeypatch.setattr(orca, "get_calculator", lambda _settings: fake_calc) + configured: list[int] = [] + monkeypatch.setattr( + orca, + "configure_cpu_threading", + lambda ncores: configured.append(ncores) or ncores, + ) + + engrad_path = orca.run_orca_job(extinp_path, settings) + assert engrad_path == tmp_path / "water.engrad" + assert engrad_path.is_file() + assert configured == [1] + + extinp = orca.read_extinp(extinp_path) + atoms = orca.atoms_from_extinp(extinp) + assert atoms.info["charge"] == 0 + assert atoms.info["spin_multiplicity"] == 1 + + expected_energy = energy_ev / ase.units.Hartree + expected_gradient = orca.forces_to_orca_gradient(forces).tolist() + content = engrad_path.read_text() + assert f"{expected_energy:.12e}" in content + for value in expected_gradient: + assert f"{value: .12e}" in content + + +def test_server_handles_job(tmp_path, orca, orca_server, monkeypatch): + extinp_path = _write_water_job(tmp_path) + fake_model = tmp_path / "fake.pt" + fake_model.write_text("placeholder") + settings = orca.MetatomicOrcaSettings(model=fake_model) + + energy_ev = -13.6 + fake_calc = FixedEnergyCalculator(energy_ev, np.zeros((3, 3))) + monkeypatch.setattr(orca, "get_calculator", lambda _settings: fake_calc) + + server = orca_server.MetatomicOrcaServer(default_settings=settings) + try: + result = server.handle( + [extinp_path.name], + str(tmp_path), + ) + assert result["status"] == "Success" + assert (tmp_path / "water.engrad").is_file() + finally: + server.shutdown() + + +def test_client_server_http_roundtrip(tmp_path, orca, orca_server, orca_client, monkeypatch): + extinp_path = _write_water_job(tmp_path) + fake_model = tmp_path / "fake.pt" + fake_model.write_text("placeholder") + settings = orca.MetatomicOrcaSettings(model=fake_model) + + fake_calc = FixedEnergyCalculator(-10.0, np.zeros((3, 3))) + monkeypatch.setattr(orca, "get_calculator", lambda _settings: fake_calc) + + server = orca_server.MetatomicOrcaServer(default_settings=settings) + handler = orca_server.create_handler(server) + httpd = ThreadingHTTPServer(("127.0.0.1", 0), handler) + host, port = httpd.server_address + thread = threading.Thread(target=httpd.serve_forever, daemon=True) + thread.start() + + try: + orca_client.send_to_server( + f"{host}:{port}", + [extinp_path.name], + working_directory=str(tmp_path), + ) + assert (tmp_path / "water.engrad").is_file() + finally: + server.shutdown() + httpd.shutdown() + httpd.server_close() + thread.join(timeout=2.0) + + +def test_healthz_endpoint(tmp_path, orca, orca_server, monkeypatch): + fake_model = tmp_path / "fake.pt" + fake_model.write_text("placeholder") + settings = orca.MetatomicOrcaSettings(model=fake_model) + monkeypatch.setattr(orca, "get_calculator", lambda _settings: FixedEnergyCalculator(0.0, np.zeros((1, 3)))) + + server = orca_server.MetatomicOrcaServer(default_settings=settings) + handler = orca_server.create_handler(server) + httpd = ThreadingHTTPServer(("127.0.0.1", 0), handler) + host, port = httpd.server_address + thread = threading.Thread(target=httpd.serve_forever, daemon=True) + thread.start() + + try: + with urllib.request.urlopen(f"http://{host}:{port}/healthz", timeout=5) as response: + payload = json.loads(response.read().decode("utf-8")) + assert payload == {"status": "OK"} + finally: + server.shutdown() + httpd.shutdown() + httpd.server_close() + thread.join(timeout=2.0) diff --git a/python/examples/orca/water_opt/water.xyz b/python/examples/orca/water_opt/water.xyz new file mode 100644 index 000000000..016c9c364 --- /dev/null +++ b/python/examples/orca/water_opt/water.xyz @@ -0,0 +1,5 @@ +3 +water +O 0.000000 0.000000 0.117300 +H 0.000000 0.757200 -0.469200 +H 0.000000 -0.757200 -0.469200 diff --git a/python/examples/orca/water_opt/water_opt.inp b/python/examples/orca/water_opt/water_opt.inp new file mode 100644 index 000000000..0f2c766d0 --- /dev/null +++ b/python/examples/orca/water_opt/water_opt.inp @@ -0,0 +1,9 @@ +! ExtOpt Opt +%maxcore 2000 + +%method + ProgExt "/mmhome/boittier/home/metatomic/python/examples/orca/metatomic-orca-client" + Ext_Params "-b 127.0.0.1:8888" +end + +* xyzfile 0 1 water.xyz