Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
296 changes: 160 additions & 136 deletions jvm-packages/create_jni.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
#!/usr/bin/env python
"""Build the native XGBoost4J JNI library."""

import argparse
import errno
import glob
import os
import platform
import shlex
import shutil
import subprocess
import sys
from contextlib import contextmanager

# Monkey-patch the API inconsistency between Python2.X and 3.X.
if sys.platform.startswith("linux"):
sys.platform = "linux"
from pathlib import Path
from typing import Sequence

ROOT = Path(__file__).resolve().parents[1]
JVM_PACKAGES = Path(__file__).resolve().parent

CONFIG = {
DEFAULT_CONFIG = {
"USE_OPENMP": "ON",
"USE_CUDA": "OFF",
"USE_NCCL": "OFF",
Expand All @@ -24,121 +24,121 @@
}


@contextmanager
def cd(path):
path = normpath(path)
cwd = os.getcwd()
os.chdir(path)
print("cd " + path, flush=True)
try:
yield path
finally:
os.chdir(cwd)
def run(command: Sequence[str], *, cwd: Path | None = None) -> None:
"""Run a shell command."""
print(shlex.join(command), flush=True)
subprocess.run(command, cwd=cwd, check=True, env=os.environ)


def maybe_makedirs(path):
path = normpath(path)
print("mkdir -p " + path, flush=True)
try:
os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
def mkdir(path: Path) -> None:
"""Create a directory if it does not already exist."""
print(f"mkdir -p {path}", flush=True)
path.mkdir(parents=True, exist_ok=True)


def run(command, **kwargs):
print(command, flush=True)
subprocess.run(command, shell=True, check=True, env=os.environ, **kwargs)
def copy_file(source: Path, target: Path) -> None:
"""Copy a file to a target path or directory."""
print(f"cp {source} {target}", flush=True)
shutil.copy(source, target)


def cp(source, target):
source = normpath(source)
target = normpath(target)
print("cp {0} {1}".format(source, target), flush=True)
shutil.copy(source, target)
def copy_glob(pattern: str, target: Path) -> None:
"""Copy files matching a glob pattern to a target directory."""
for source in ROOT.glob(pattern):
copy_file(source, target)


def normpath(path):
"""Normalize UNIX path to a native path."""
normalized = os.path.join(*path.split("/"))
if os.path.isabs(path):
return os.path.abspath("/") + normalized
else:
return normalized
def cmake_config(options: argparse.Namespace) -> dict[str, str]:
"""Create CMake configuration from CLI options."""
config = DEFAULT_CONFIG.copy()
config["USE_OPENMP"] = options.use_openmp
config["USE_NVTX"] = options.use_nvtx
config["PLUGIN_RMM"] = options.plugin_rmm

if options.log_capi_invocation == "ON":
config["LOG_CAPI_INVOCATION"] = "ON"
if options.use_debug == "ON":
config["CMAKE_BUILD_TYPE"] = "Debug"
if options.use_cuda == "ON":
config["USE_CUDA"] = "ON"
config["USE_NCCL"] = "ON"
config["USE_DLOPEN_NCCL"] = "OFF"

def native_build(cli_args: argparse.Namespace) -> None:
CONFIG["USE_OPENMP"] = cli_args.use_openmp
if sys.platform == "darwin":
os.environ["JAVA_HOME"] = (
subprocess.check_output("/usr/libexec/java_home").strip().decode()
)
if cli_args.use_debug == "ON":
CONFIG["CMAKE_BUILD_TYPE"] = "Debug"
CONFIG["USE_NVTX"] = cli_args.use_nvtx
CONFIG["PLUGIN_RMM"] = cli_args.plugin_rmm
return config

print("building Java wrapper", flush=True)
with cd(".."):
build_dir = "build-gpu" if cli_args.use_cuda == "ON" else "build"
maybe_makedirs(build_dir)

if sys.platform == "linux":
maybe_parallel_build = " -- -j $(nproc)"
elif sys.platform == "win32":
maybe_parallel_build = ' -- /m /nodeReuse:false "/consoleloggerparameters:ShowCommandLine;Verbosity=minimal"'
else:
maybe_parallel_build = ""

if cli_args.log_capi_invocation == "ON":
CONFIG["LOG_CAPI_INVOCATION"] = "ON"

if cli_args.use_cuda == "ON":
CONFIG["USE_CUDA"] = "ON"
CONFIG["USE_NCCL"] = "ON"
CONFIG["USE_DLOPEN_NCCL"] = "OFF"

args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()]
if sys.platform != "win32":

def cmake_args(config: dict[str, str]) -> list[str]:
"""Create CMake command line arguments."""
args = [
f"-D{k}:BOOL={v}" if v in ("ON", "OFF") else f"-D{k}:STRING={v}"
for k, v in config.items()
]

if sys.platform != "win32" and shutil.which("ninja"):
args.append("-GNinja")

# Set GPU_ARCH_FLAG to override the CUDA architectures.
if gpu_arch_flag := os.getenv("GPU_ARCH_FLAG"):
args.append(f"-DCMAKE_CUDA_ARCHITECTURES={gpu_arch_flag}")

return args


def windows_generators() -> tuple[list[str], ...]:
"""Return CMake generator arguments to try on Windows."""
return (
[], # Let CMake decide.
["-G", "Visual Studio 18 2026", "-A", "x64"],
["-G", "Visual Studio 17 2022", "-A", "x64"],
["-G", "Visual Studio 16 2019", "-A", "x64"],
["-G", "Visual Studio 15 2017", "-A", "x64"],
)


def configure(config_args: list[str], build_dir: Path) -> None:
"""Configure the CMake build."""
if sys.platform == "win32":
for generator in windows_generators():
try:
subprocess.check_call(["ninja", "--version"])
args.append("-GNinja")
except FileNotFoundError:
pass

# if enviorment set GPU_ARCH_FLAG
gpu_arch_flag = os.getenv("GPU_ARCH_FLAG", None)
if gpu_arch_flag is not None:
args.append("-DCMAKE_CUDA_ARCHITECTURES=%s" % gpu_arch_flag)

with cd(build_dir):
lib_dir = os.path.join(os.pardir, "lib")
if os.path.exists(lib_dir):
shutil.rmtree(lib_dir)

# Same trick as Python build, just test all possible generators.
if sys.platform == "win32":
supported_generators = (
"", # empty, decided by cmake
'-G"Visual Studio 17 2022" -A x64',
'-G"Visual Studio 16 2019" -A x64',
'-G"Visual Studio 15 2017" -A x64',
run(["cmake", str(ROOT), *config_args, *generator], cwd=build_dir)
return
except subprocess.CalledProcessError as err:
print(
f"Failed to build with generator: {shlex.join(generator)}",
err,
flush=True,
)
for generator in supported_generators:
try:
run("cmake .. " + " ".join(args + [generator]))
break
except subprocess.CalledProcessError as e:
print(f"Failed to build with generator: {generator}", e, flush=True)
with cd(os.path.pardir):
shutil.rmtree(build_dir)
maybe_makedirs(build_dir)
else:
run("cmake .. " + " ".join(args))
run("cmake --build . --config Release" + maybe_parallel_build)
shutil.rmtree(build_dir)
mkdir(build_dir)
raise RuntimeError("None of the supported CMake generators worked.")

run(["cmake", str(ROOT), *config_args], cwd=build_dir)


def build(config: dict[str, str], build_dir: Path) -> None:
"""Build the native library."""
if (lib_dir := ROOT / "lib").exists():
shutil.rmtree(lib_dir)

configure(cmake_args(config), build_dir)

build_args = ["cmake", "--build", ".", "--config", "Release"]
if sys.platform == "linux":
build_args.extend(["--", "-j", str(os.cpu_count() or 1)])
elif sys.platform == "win32":
build_args.extend(
[
"--",
"/m",
"/nodeReuse:false",
"/consoleloggerparameters:ShowCommandLine;Verbosity=minimal",
]
)
run(build_args, cwd=build_dir)


print("copying native library", flush=True)
def copy_native_library() -> None:
"""Copy the native library into the JVM package resources."""
library_name, os_folder = {
"Windows": ("xgboost4j.dll", "windows"),
"Darwin": ("libxgboost4j.dylib", "macos"),
Expand All @@ -154,35 +154,59 @@ def native_build(cli_args: argparse.Namespace) -> None:
"arm64": "aarch64", # on macOS & Windows ARM 64-bit
"aarch64": "aarch64",
}[platform.machine().lower()]
output_folder = "xgboost4j/src/main/resources/lib/{}/{}".format(
os_folder, arch_folder

output_folder = (
JVM_PACKAGES / "xgboost4j/src/main/resources/lib" / os_folder / arch_folder
)
maybe_makedirs(output_folder)
cp("../lib/" + library_name, output_folder)
mkdir(output_folder)
copy_file(ROOT / "lib" / library_name, output_folder)


def copy_test_resources(*, use_cuda: bool) -> None:
"""Copy training data used by JVM package tests."""
xgboost4j_resources = JVM_PACKAGES / "xgboost4j/src/test/resources"
mkdir(xgboost4j_resources)
copy_glob("demo/data/agaricus.*", xgboost4j_resources)

xgboost4j_spark_resources = JVM_PACKAGES / "xgboost4j-spark/src/test/resources"
mkdir(xgboost4j_spark_resources)

regression_dir = ROOT / "demo/data/regression"
run([sys.executable, "mapfeat.py"], cwd=regression_dir)
run([sys.executable, "mknfold.py", "machine.txt", "1"], cwd=regression_dir)

copy_glob("demo/data/regression/machine.txt.t*", xgboost4j_spark_resources)
copy_glob("demo/data/agaricus.*", xgboost4j_spark_resources)

if use_cuda:
xgboost4j_spark_gpu_resources = (
JVM_PACKAGES / "xgboost4j-spark-gpu/src/test/resources"
)
mkdir(xgboost4j_spark_gpu_resources)
copy_glob("demo/data/veterans_lung_cancer.csv", xgboost4j_spark_gpu_resources)
copy_file(
xgboost4j_spark_resources / "rank.train.csv",
xgboost4j_spark_gpu_resources,
)

print("copying train/test files", flush=True)

# for xgboost4j
maybe_makedirs("xgboost4j/src/test/resources")
for file in glob.glob("../demo/data/agaricus.*"):
cp(file, "xgboost4j/src/test/resources")

# for xgboost4j-spark
maybe_makedirs("xgboost4j-spark/src/test/resources")
with cd("../demo/data/regression"):
run(f'"{sys.executable}" mapfeat.py')
run(f'"{sys.executable}" mknfold.py machine.txt 1')
for file in glob.glob("../demo/data/regression/machine.txt.t*"):
cp(file, "xgboost4j-spark/src/test/resources")
for file in glob.glob("../demo/data/agaricus.*"):
cp(file, "xgboost4j-spark/src/test/resources")

# for xgboost4j-spark-gpu
if cli_args.use_cuda == "ON":
maybe_makedirs("xgboost4j-spark-gpu/src/test/resources")
for file in glob.glob("../demo/data/veterans_lung_cancer.csv"):
cp(file, "xgboost4j-spark-gpu/src/test/resources")
cp("xgboost4j-spark/src/test/resources/rank.train.csv", "xgboost4j-spark-gpu/src/test/resources")
def native_build(options: argparse.Namespace) -> None:
"""Build and copy the native JNI library and its test resources."""
if sys.platform == "darwin":
os.environ["JAVA_HOME"] = (
subprocess.check_output(["/usr/libexec/java_home"]).strip().decode()
)

print("building Java wrapper", flush=True)
build_dir = ROOT / ("build-gpu" if options.use_cuda == "ON" else "build")
mkdir(build_dir)
build(cmake_config(options), build_dir)

print("copying native library", flush=True)
copy_native_library()

print("copying train/test files", flush=True)
copy_test_resources(use_cuda=options.use_cuda == "ON")


if __name__ == "__main__":
Expand All @@ -195,5 +219,5 @@ def native_build(cli_args: argparse.Namespace) -> None:
parser.add_argument("--use-debug", type=str, choices=["ON", "OFF"], default="OFF")
parser.add_argument("--use-nvtx", type=str, choices=["ON", "OFF"], default="OFF")
parser.add_argument("--plugin-rmm", type=str, choices=["ON", "OFF"], default="OFF")
cli_args = parser.parse_args()
native_build(cli_args)
parsed_args = parser.parse_args()
native_build(parsed_args)
1 change: 1 addition & 0 deletions python-package/packager/nativelib.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def _build(*, generator: str) -> None:

if system() == "Windows":
supported_generators = (
"-GVisual Studio 18 2026",
"-GVisual Studio 17 2022",
"-GVisual Studio 16 2019",
"-GVisual Studio 15 2017",
Expand Down
4 changes: 2 additions & 2 deletions python-package/xgboost/dask/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

def get_n_threads(local_param: Dict[str, Any], worker: "distributed.Worker") -> int:
"""Get the number of threads from a worker and the user-supplied parameters."""
# dask worker nthreads, "state" is available in 2022.6.1
dwnt = worker.state.nthreads if hasattr(worker, "state") else worker.nthreads
# dask worker nthreads
dwnt = worker.state.nthreads
n_threads = None
for p in ["nthread", "n_jobs"]:
if local_param.get(p, None) is not None and local_param.get(p, dwnt) != dwnt:
Expand Down
Loading