diff --git a/jvm-packages/create_jni.py b/jvm-packages/create_jni.py index 38d0b6fbec60..0659e70598bb 100755 --- a/jvm-packages/create_jni.py +++ b/jvm-packages/create_jni.py @@ -1,20 +1,20 @@ #!/usr/bin/env python +"""Build the native XGBoost4J JNI library.""" + import argparse -import errno -import glob import os import platform +import shlex import shutil import subprocess import sys -from contextlib import contextmanager - -# Monkey-patch the API inconsistency between Python2.X and 3.X. -if sys.platform.startswith("linux"): - sys.platform = "linux" +from pathlib import Path +from typing import Sequence +ROOT = Path(__file__).resolve().parents[1] +JVM_PACKAGES = Path(__file__).resolve().parent -CONFIG = { +DEFAULT_CONFIG = { "USE_OPENMP": "ON", "USE_CUDA": "OFF", "USE_NCCL": "OFF", @@ -24,121 +24,121 @@ } -@contextmanager -def cd(path): - path = normpath(path) - cwd = os.getcwd() - os.chdir(path) - print("cd " + path, flush=True) - try: - yield path - finally: - os.chdir(cwd) +def run(command: Sequence[str], *, cwd: Path | None = None) -> None: + """Run a shell command.""" + print(shlex.join(command), flush=True) + subprocess.run(command, cwd=cwd, check=True, env=os.environ) -def maybe_makedirs(path): - path = normpath(path) - print("mkdir -p " + path, flush=True) - try: - os.makedirs(path) - except OSError as e: - if e.errno != errno.EEXIST: - raise +def mkdir(path: Path) -> None: + """Create a directory if it does not already exist.""" + print(f"mkdir -p {path}", flush=True) + path.mkdir(parents=True, exist_ok=True) -def run(command, **kwargs): - print(command, flush=True) - subprocess.run(command, shell=True, check=True, env=os.environ, **kwargs) +def copy_file(source: Path, target: Path) -> None: + """Copy a file to a target path or directory.""" + print(f"cp {source} {target}", flush=True) + shutil.copy(source, target) -def cp(source, target): - source = normpath(source) - target = normpath(target) - print("cp {0} {1}".format(source, target), flush=True) - shutil.copy(source, target) +def copy_glob(pattern: str, target: Path) -> None: + """Copy files matching a glob pattern to a target directory.""" + for source in ROOT.glob(pattern): + copy_file(source, target) -def normpath(path): - """Normalize UNIX path to a native path.""" - normalized = os.path.join(*path.split("/")) - if os.path.isabs(path): - return os.path.abspath("/") + normalized - else: - return normalized +def cmake_config(options: argparse.Namespace) -> dict[str, str]: + """Create CMake configuration from CLI options.""" + config = DEFAULT_CONFIG.copy() + config["USE_OPENMP"] = options.use_openmp + config["USE_NVTX"] = options.use_nvtx + config["PLUGIN_RMM"] = options.plugin_rmm + if options.log_capi_invocation == "ON": + config["LOG_CAPI_INVOCATION"] = "ON" + if options.use_debug == "ON": + config["CMAKE_BUILD_TYPE"] = "Debug" + if options.use_cuda == "ON": + config["USE_CUDA"] = "ON" + config["USE_NCCL"] = "ON" + config["USE_DLOPEN_NCCL"] = "OFF" -def native_build(cli_args: argparse.Namespace) -> None: - CONFIG["USE_OPENMP"] = cli_args.use_openmp - if sys.platform == "darwin": - os.environ["JAVA_HOME"] = ( - subprocess.check_output("/usr/libexec/java_home").strip().decode() - ) - if cli_args.use_debug == "ON": - CONFIG["CMAKE_BUILD_TYPE"] = "Debug" - CONFIG["USE_NVTX"] = cli_args.use_nvtx - CONFIG["PLUGIN_RMM"] = cli_args.plugin_rmm + return config - print("building Java wrapper", flush=True) - with cd(".."): - build_dir = "build-gpu" if cli_args.use_cuda == "ON" else "build" - maybe_makedirs(build_dir) - - if sys.platform == "linux": - maybe_parallel_build = " -- -j $(nproc)" - elif sys.platform == "win32": - maybe_parallel_build = ' -- /m /nodeReuse:false "/consoleloggerparameters:ShowCommandLine;Verbosity=minimal"' - else: - maybe_parallel_build = "" - - if cli_args.log_capi_invocation == "ON": - CONFIG["LOG_CAPI_INVOCATION"] = "ON" - - if cli_args.use_cuda == "ON": - CONFIG["USE_CUDA"] = "ON" - CONFIG["USE_NCCL"] = "ON" - CONFIG["USE_DLOPEN_NCCL"] = "OFF" - - args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()] - if sys.platform != "win32": + +def cmake_args(config: dict[str, str]) -> list[str]: + """Create CMake command line arguments.""" + args = [ + f"-D{k}:BOOL={v}" if v in ("ON", "OFF") else f"-D{k}:STRING={v}" + for k, v in config.items() + ] + + if sys.platform != "win32" and shutil.which("ninja"): + args.append("-GNinja") + + # Set GPU_ARCH_FLAG to override the CUDA architectures. + if gpu_arch_flag := os.getenv("GPU_ARCH_FLAG"): + args.append(f"-DCMAKE_CUDA_ARCHITECTURES={gpu_arch_flag}") + + return args + + +def windows_generators() -> tuple[list[str], ...]: + """Return CMake generator arguments to try on Windows.""" + return ( + [], # Let CMake decide. + ["-G", "Visual Studio 18 2026", "-A", "x64"], + ["-G", "Visual Studio 17 2022", "-A", "x64"], + ["-G", "Visual Studio 16 2019", "-A", "x64"], + ["-G", "Visual Studio 15 2017", "-A", "x64"], + ) + + +def configure(config_args: list[str], build_dir: Path) -> None: + """Configure the CMake build.""" + if sys.platform == "win32": + for generator in windows_generators(): try: - subprocess.check_call(["ninja", "--version"]) - args.append("-GNinja") - except FileNotFoundError: - pass - - # if enviorment set GPU_ARCH_FLAG - gpu_arch_flag = os.getenv("GPU_ARCH_FLAG", None) - if gpu_arch_flag is not None: - args.append("-DCMAKE_CUDA_ARCHITECTURES=%s" % gpu_arch_flag) - - with cd(build_dir): - lib_dir = os.path.join(os.pardir, "lib") - if os.path.exists(lib_dir): - shutil.rmtree(lib_dir) - - # Same trick as Python build, just test all possible generators. - if sys.platform == "win32": - supported_generators = ( - "", # empty, decided by cmake - '-G"Visual Studio 17 2022" -A x64', - '-G"Visual Studio 16 2019" -A x64', - '-G"Visual Studio 15 2017" -A x64', + run(["cmake", str(ROOT), *config_args, *generator], cwd=build_dir) + return + except subprocess.CalledProcessError as err: + print( + f"Failed to build with generator: {shlex.join(generator)}", + err, + flush=True, ) - for generator in supported_generators: - try: - run("cmake .. " + " ".join(args + [generator])) - break - except subprocess.CalledProcessError as e: - print(f"Failed to build with generator: {generator}", e, flush=True) - with cd(os.path.pardir): - shutil.rmtree(build_dir) - maybe_makedirs(build_dir) - else: - run("cmake .. " + " ".join(args)) - run("cmake --build . --config Release" + maybe_parallel_build) + shutil.rmtree(build_dir) + mkdir(build_dir) + raise RuntimeError("None of the supported CMake generators worked.") + + run(["cmake", str(ROOT), *config_args], cwd=build_dir) + + +def build(config: dict[str, str], build_dir: Path) -> None: + """Build the native library.""" + if (lib_dir := ROOT / "lib").exists(): + shutil.rmtree(lib_dir) + + configure(cmake_args(config), build_dir) + + build_args = ["cmake", "--build", ".", "--config", "Release"] + if sys.platform == "linux": + build_args.extend(["--", "-j", str(os.cpu_count() or 1)]) + elif sys.platform == "win32": + build_args.extend( + [ + "--", + "/m", + "/nodeReuse:false", + "/consoleloggerparameters:ShowCommandLine;Verbosity=minimal", + ] + ) + run(build_args, cwd=build_dir) - print("copying native library", flush=True) +def copy_native_library() -> None: + """Copy the native library into the JVM package resources.""" library_name, os_folder = { "Windows": ("xgboost4j.dll", "windows"), "Darwin": ("libxgboost4j.dylib", "macos"), @@ -154,35 +154,59 @@ def native_build(cli_args: argparse.Namespace) -> None: "arm64": "aarch64", # on macOS & Windows ARM 64-bit "aarch64": "aarch64", }[platform.machine().lower()] - output_folder = "xgboost4j/src/main/resources/lib/{}/{}".format( - os_folder, arch_folder + + output_folder = ( + JVM_PACKAGES / "xgboost4j/src/main/resources/lib" / os_folder / arch_folder ) - maybe_makedirs(output_folder) - cp("../lib/" + library_name, output_folder) + mkdir(output_folder) + copy_file(ROOT / "lib" / library_name, output_folder) + + +def copy_test_resources(*, use_cuda: bool) -> None: + """Copy training data used by JVM package tests.""" + xgboost4j_resources = JVM_PACKAGES / "xgboost4j/src/test/resources" + mkdir(xgboost4j_resources) + copy_glob("demo/data/agaricus.*", xgboost4j_resources) + + xgboost4j_spark_resources = JVM_PACKAGES / "xgboost4j-spark/src/test/resources" + mkdir(xgboost4j_spark_resources) + + regression_dir = ROOT / "demo/data/regression" + run([sys.executable, "mapfeat.py"], cwd=regression_dir) + run([sys.executable, "mknfold.py", "machine.txt", "1"], cwd=regression_dir) + + copy_glob("demo/data/regression/machine.txt.t*", xgboost4j_spark_resources) + copy_glob("demo/data/agaricus.*", xgboost4j_spark_resources) + + if use_cuda: + xgboost4j_spark_gpu_resources = ( + JVM_PACKAGES / "xgboost4j-spark-gpu/src/test/resources" + ) + mkdir(xgboost4j_spark_gpu_resources) + copy_glob("demo/data/veterans_lung_cancer.csv", xgboost4j_spark_gpu_resources) + copy_file( + xgboost4j_spark_resources / "rank.train.csv", + xgboost4j_spark_gpu_resources, + ) - print("copying train/test files", flush=True) - # for xgboost4j - maybe_makedirs("xgboost4j/src/test/resources") - for file in glob.glob("../demo/data/agaricus.*"): - cp(file, "xgboost4j/src/test/resources") - - # for xgboost4j-spark - maybe_makedirs("xgboost4j-spark/src/test/resources") - with cd("../demo/data/regression"): - run(f'"{sys.executable}" mapfeat.py') - run(f'"{sys.executable}" mknfold.py machine.txt 1') - for file in glob.glob("../demo/data/regression/machine.txt.t*"): - cp(file, "xgboost4j-spark/src/test/resources") - for file in glob.glob("../demo/data/agaricus.*"): - cp(file, "xgboost4j-spark/src/test/resources") - - # for xgboost4j-spark-gpu - if cli_args.use_cuda == "ON": - maybe_makedirs("xgboost4j-spark-gpu/src/test/resources") - for file in glob.glob("../demo/data/veterans_lung_cancer.csv"): - cp(file, "xgboost4j-spark-gpu/src/test/resources") - cp("xgboost4j-spark/src/test/resources/rank.train.csv", "xgboost4j-spark-gpu/src/test/resources") +def native_build(options: argparse.Namespace) -> None: + """Build and copy the native JNI library and its test resources.""" + if sys.platform == "darwin": + os.environ["JAVA_HOME"] = ( + subprocess.check_output(["/usr/libexec/java_home"]).strip().decode() + ) + + print("building Java wrapper", flush=True) + build_dir = ROOT / ("build-gpu" if options.use_cuda == "ON" else "build") + mkdir(build_dir) + build(cmake_config(options), build_dir) + + print("copying native library", flush=True) + copy_native_library() + + print("copying train/test files", flush=True) + copy_test_resources(use_cuda=options.use_cuda == "ON") if __name__ == "__main__": @@ -195,5 +219,5 @@ def native_build(cli_args: argparse.Namespace) -> None: parser.add_argument("--use-debug", type=str, choices=["ON", "OFF"], default="OFF") parser.add_argument("--use-nvtx", type=str, choices=["ON", "OFF"], default="OFF") parser.add_argument("--plugin-rmm", type=str, choices=["ON", "OFF"], default="OFF") - cli_args = parser.parse_args() - native_build(cli_args) + parsed_args = parser.parse_args() + native_build(parsed_args) diff --git a/python-package/packager/nativelib.py b/python-package/packager/nativelib.py index 3d714eca5abf..8b4086eec44c 100644 --- a/python-package/packager/nativelib.py +++ b/python-package/packager/nativelib.py @@ -67,6 +67,7 @@ def _build(*, generator: str) -> None: if system() == "Windows": supported_generators = ( + "-GVisual Studio 18 2026", "-GVisual Studio 17 2022", "-GVisual Studio 16 2019", "-GVisual Studio 15 2017", diff --git a/python-package/xgboost/dask/utils.py b/python-package/xgboost/dask/utils.py index e9a004f6e8d1..8f16ee5a0003 100644 --- a/python-package/xgboost/dask/utils.py +++ b/python-package/xgboost/dask/utils.py @@ -17,8 +17,8 @@ def get_n_threads(local_param: Dict[str, Any], worker: "distributed.Worker") -> int: """Get the number of threads from a worker and the user-supplied parameters.""" - # dask worker nthreads, "state" is available in 2022.6.1 - dwnt = worker.state.nthreads if hasattr(worker, "state") else worker.nthreads + # dask worker nthreads + dwnt = worker.state.nthreads n_threads = None for p in ["nthread", "n_jobs"]: if local_param.get(p, None) is not None and local_param.get(p, dwnt) != dwnt: