From a946a3acf583d12fa76685dd34bfe2a1b2885b96 Mon Sep 17 00:00:00 2001 From: Nikola Jokic Date: Thu, 23 Apr 2026 17:59:37 +0200 Subject: [PATCH 01/11] fix(ci): make bootstrap deterministic and align CI installs - Change package.json bootstrap to use 'npm ci --prefix packages/hooklib' - Change k8s-tests workflow to use 'npm ci' instead of 'npm install' - All three packages (hooklib, k8s, docker) now use deterministic installs - All three CI jobs (format-and-lint, docker-tests, k8s-tests) now use npm ci --- .github/workflows/build.yaml | 8 +++++++- package.json | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index bc125b9b..7a3e1ec9 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -17,6 +17,8 @@ jobs: with: node-version: 24 cache: npm + - run: node --version && npm --version + name: Log Node and npm versions - run: npm ci name: Install dependencies - run: npm run bootstrap @@ -40,6 +42,8 @@ jobs: with: node-version: 24 cache: npm + - run: node --version && npm --version + name: Log Node and npm versions - run: npm ci name: Install dependencies - run: npm run bootstrap @@ -64,7 +68,9 @@ jobs: - uses: helm/kind-action@v1.12.0 with: config: packages/k8s/tests/test-kind.yaml - - run: npm install + - run: node --version && npm --version + name: Log Node and npm versions + - run: npm ci name: Install dependencies - run: npm run bootstrap name: Bootstrap the packages diff --git a/package.json b/package.json index 21743855..ed388dea 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,7 @@ }, "scripts": { "test": "npm run test --prefix packages/docker && npm run test --prefix packages/k8s", - "bootstrap": "npm install --prefix packages/hooklib && npm ci --prefix packages/k8s && npm ci --prefix packages/docker", + "bootstrap": "npm ci --prefix packages/hooklib && npm ci --prefix packages/k8s && npm ci --prefix packages/docker", "format": "prettier --write '**/*.ts'", "format-check": "prettier --check '**/*.ts'", "lint": "eslint packages/**/*.ts", From 3133f05199f81bd049219a9034d8b9a13641c946 Mon Sep 17 00:00:00 2001 From: Nikola Jokic Date: Tue, 21 Apr 2026 14:02:21 +0200 Subject: [PATCH 02/11] Ensure read write many first --- docs/adrs/0135-rwx-volume-strategy.md | 55 ++ packages/k8s/README.md | 21 + packages/k8s/src/hooks/prepare-job.ts | 77 +-- packages/k8s/src/hooks/run-container-step.ts | 177 +++--- packages/k8s/src/hooks/run-script-step.ts | 81 +-- packages/k8s/src/k8s/index.ts | 543 +++++-------------- packages/k8s/src/k8s/utils.ts | 155 +++++- packages/k8s/tests/rwo-affinity-test.ts | 143 +++++ packages/k8s/tests/rwx-contract-demo-test.ts | 28 + packages/k8s/tests/rwx-volume-test.ts | 119 ++++ packages/k8s/tests/test-setup.ts | 142 +++++ 11 files changed, 891 insertions(+), 650 deletions(-) create mode 100644 docs/adrs/0135-rwx-volume-strategy.md create mode 100644 packages/k8s/tests/rwo-affinity-test.ts create mode 100644 packages/k8s/tests/rwx-contract-demo-test.ts create mode 100644 packages/k8s/tests/rwx-volume-test.ts diff --git a/docs/adrs/0135-rwx-volume-strategy.md b/docs/adrs/0135-rwx-volume-strategy.md new file mode 100644 index 00000000..646e7092 --- /dev/null +++ b/docs/adrs/0135-rwx-volume-strategy.md @@ -0,0 +1,55 @@ +# ADR 0135: RWX volume strategy and RWO affinity fallback + +**Date:** 22 April 2026 + +**Status**: Accepted + +## Context + +The Kubernetes hook implementation for GitHub Actions runners requires access to the runner's working directory (`_work`) within the dynamically created job pods. This shared access is typically managed via Persistent Volume Claims (PVCs). + +Regardless of the storage strategy, job pods are always constrained to run on the same node as the runner pod to ensure consistent access to the local environment and state. The choice of volume access mode determines operational flexibility and multi-pod access capability rather than pod placement. + +Depending on the storage provider and cluster configuration, operators may choose between `ReadWriteMany` (RWX) or `ReadWriteOnce` (RWO) access modes. RWX is preferred because it allows multiple pods to access the volume simultaneously, providing greater operational flexibility for future scaling or monitoring scenarios. RWO restricts volume access to a single pod at a time, locking the volume to that pod's specific node. + +## Decision + +We have decided to establish `ReadWriteMany` (RWX) as the preferred storage strategy for the Kubernetes hook. While job pods remain pinned to the runner's node, RWX provides superior operational flexibility by allowing multiple pods (such as sidecars or auxiliary tools) to access the same volume without storage-imposed locking constraints. + +For environments where RWX is unavailable or undesirable, we support a `ReadWriteOnce` (RWO) fallback strategy. This fallback is implemented using node affinity to ensure that job pods are scheduled onto the same node as the runner pod that holds the RWO volume. + +### Operational Guidance + +1. **Preferred Model (RWX):** Operators should configure the runner with a PVC supporting `ReadWriteMany`. +2. **Fallback Model (RWO):** If using `ReadWriteOnce`, operators must enable the Kubernetes scheduler integration by setting `ACTIONS_RUNNER_USE_KUBE_SCHEDULER=true`. +3. **Node Selection:** When scheduler integration is enabled, the hook applies a `requiredDuringSchedulingIgnoredDuringExecution` node affinity targeting the runner's current node (`kubernetes.io/hostname`). +4. **Implementation Details:** + - The hook determines the node name via `getCurrentNodeName()` and applies affinity in `packages/k8s/src/k8s/index.ts` (lines 101, 165). + - The scheduler behavior is toggled by the `ACTIONS_RUNNER_USE_KUBE_SCHEDULER` environment variable, as defined in `packages/k8s/src/k8s/utils.ts` (line 16). + - The PVC claim name defaults to `${ACTIONS_RUNNER_POD_NAME}-work` unless overridden by `ACTIONS_RUNNER_CLAIM_NAME` (`packages/k8s/src/hooks/constants.ts`, lines 27-33). + +### Non-Recommendations + +We explicitly do **not** recommend the use of `spec.nodeName` for operator-driven scheduling. While the hook uses `nodeName` as a legacy fallback when `ACTIONS_RUNNER_USE_KUBE_SCHEDULER` is not set to `true` (`packages/k8s/src/k8s/index.ts`, lines 103, 167), this bypasses the Kubernetes scheduler and can lead to scheduling failures or resource imbalances. Operators should always prefer the affinity-based approach for RWO volumes. + +## Alternatives + +- **nodeName Bypass:** Directly setting `nodeName` bypasses the scheduler entirely. This was rejected as a recommendation because it prevents the scheduler from accounting for taints, tolerations, and resource pressure. +- **Local Volumes:** Using local volumes tied to specific nodes. This is a subset of the RWO fallback and is supported via the affinity mechanism. + +## Consequences + +- **Flexibility:** RWX users benefit from the ability to have multiple pods access the volume simultaneously, simplifying future operational extensions. +- **Node Coupling:** All users are coupled to the node where the runner pod is running. The hook ensures job pods are scheduled on the same node to maintain workspace integrity. +- **Configuration:** Operators must be aware of the `ACTIONS_RUNNER_USE_KUBE_SCHEDULER` toggle when moving from RWX to RWO. This toggle controls whether the hook uses `nodeName` (bypassing the scheduler) or node affinity (using the scheduler) to pin the pod to the runner's node. + +## Migration Guidance + +Operators migrating from an RWO setup that relied on default `nodeName` behavior to a more robust affinity-based setup should: +1. Ensure the runner pod has the `ACTIONS_RUNNER_USE_KUBE_SCHEDULER` environment variable set to `true`. +2. Verify that the runner's ServiceAccount has the necessary permissions to list pods (to determine its own node). + +## Non-Goals + +- This ADR does not recommend `nodeName` as a primary or secondary configuration path for operators. +- This ADR does not dictate specific storage providers (e.g., EBS vs. EFS vs. Azure Files), but rather the access mode strategy. diff --git a/packages/k8s/README.md b/packages/k8s/README.md index ecc893b8..23d92c79 100644 --- a/packages/k8s/README.md +++ b/packages/k8s/README.md @@ -30,6 +30,27 @@ rules: - The `ACTIONS_RUNNER_REQUIRE_JOB_CONTAINER` env should be set to true to prevent the runner from running any jobs outside of a container - The runner pod should map a persistent volume claim into the `_work` directory - The `ACTIONS_RUNNER_CLAIM_NAME` env should be set to the persistent volume claim that contains the runner's working directory, otherwise it defaults to `${ACTIONS_RUNNER_POD_NAME}-work` +- The `ACTIONS_RUNNER_USE_KUBE_SCHEDULER` env can be set to `true` to enable the Kubernetes scheduler for job pods. When set to `true`, the hook uses `nodeAffinity` to ensure job pods are scheduled correctly (essential for `ReadWriteOnce` volumes). If not set, the hook defaults to a legacy mode where job pods are pinned to the same node as the runner pod using `nodeName`. + +## Storage Guidance +The K8s hooks require a shared volume between the runner pod and the job pods to share the workspace and other internal directories. + +### RWX (Recommended) +The preferred way to configure storage is using a `ReadWriteMany` (RWX) Persistent Volume Claim. While job pods are always pinned to the runner's node, RWX provides better operational flexibility by allowing multiple pods to access the same workspace simultaneously. + +To migrate from RWO to RWX: +1. Provision a new `ReadWriteMany` StorageClass if one is not available. +2. Update your PVC definition to use `accessModes: [ReadWriteMany]`. +3. Set `ACTIONS_RUNNER_USE_KUBE_SCHEDULER=true` to enable the scheduler-based node pinning (via affinity) instead of the default `nodeName` pinning. + +### RWO Fallback (Affinity-based) +If `ReadWriteMany` storage is not available, you can use `ReadWriteOnce` (RWO) storage. In this mode, all job pods must be scheduled on the same node as the runner pod that owns the PVC. + +To enable this safely: +1. Ensure `ACTIONS_RUNNER_USE_KUBE_SCHEDULER` is set to `true`. +2. The hooks will automatically add a `nodeAffinity` to the job pods, ensuring they are scheduled on the same node as the runner pod (`kubernetes.io/hostname` match). + +> **Note:** We do not recommend manually setting `nodeName` in the pod template, as the hooks handle node placement automatically via affinity when the scheduler is enabled. - Some actions runner env's are expected to be set. These are set automatically by the runner. - `RUNNER_WORKSPACE` is expected to be set to the workspace of the runner - `GITHUB_WORKSPACE` is expected to be set to the workspace of the job diff --git a/packages/k8s/src/hooks/prepare-job.ts b/packages/k8s/src/hooks/prepare-job.ts index 28453c17..cce677c3 100644 --- a/packages/k8s/src/hooks/prepare-job.ts +++ b/packages/k8s/src/hooks/prepare-job.ts @@ -1,4 +1,5 @@ import * as core from '@actions/core' +import * as io from '@actions/io' import * as k8s from '@kubernetes/client-node' import { JobContainerInfo, @@ -7,33 +8,26 @@ import { writeToResponseFile, ServiceContainerInfo } from 'hooklib' +import path from 'path' import { containerPorts, - createJobPod, + createPod, isPodContainerAlpine, prunePods, waitForPodPhases, - getPrepareJobTimeoutSeconds, - execCpToPod, - execPodStep + getPrepareJobTimeoutSeconds } from '../k8s' import { - CONTAINER_VOLUMES, + containerVolumes, DEFAULT_CONTAINER_ENTRY_POINT, DEFAULT_CONTAINER_ENTRY_POINT_ARGS, generateContainerName, mergeContainerWithOptions, readExtensionFromFile, PodPhase, - fixArgs, - prepareJobScript + fixArgs } from '../k8s/utils' -import { - CONTAINER_EXTENSION_PREFIX, - getJobPodName, - JOB_CONTAINER_NAME -} from './constants' -import { dirname } from 'path' +import { CONTAINER_EXTENSION_PREFIX, JOB_CONTAINER_NAME } from './constants' export async function prepareJob( args: PrepareJobArgs, @@ -46,6 +40,7 @@ export async function prepareJob( await prunePods() const extension = readExtensionFromFile() + await copyExternalsToRoot() let container: k8s.V1Container | undefined = undefined if (args.container?.image) { @@ -75,8 +70,7 @@ export async function prepareJob( let createdPod: k8s.V1Pod | undefined = undefined try { - createdPod = await createJobPod( - getJobPodName(), + createdPod = await createPod( container, services, args.container.registry, @@ -96,13 +90,6 @@ export async function prepareJob( `Job pod created, waiting for it to come online ${createdPod?.metadata?.name}` ) - const runnerWorkspace = dirname(process.env.RUNNER_WORKSPACE as string) - - let prepareScript: { containerPath: string; runnerPath: string } | undefined - if (args.container?.userMountVolumes?.length) { - prepareScript = prepareJobScript(args.container.userMountVolumes || []) - } - try { await waitForPodPhases( createdPod.metadata.name, @@ -115,28 +102,6 @@ export async function prepareJob( throw new Error(`pod failed to come online with error: ${err}`) } - await execCpToPod(createdPod.metadata.name, runnerWorkspace, '/__w') - - if (prepareScript) { - await execPodStep( - ['sh', '-e', prepareScript.containerPath], - createdPod.metadata.name, - JOB_CONTAINER_NAME - ) - - const promises: Promise[] = [] - for (const vol of args?.container?.userMountVolumes || []) { - promises.push( - execCpToPod( - createdPod.metadata.name, - vol.sourceVolumePath, - vol.targetVolumePath - ) - ) - } - await Promise.all(promises) - } - core.debug('Job pod is ready for traffic') let isAlpine = false @@ -180,8 +145,10 @@ function generateResponseFile( const mainContainerContextPorts: ContextPorts = {} if (mainContainer?.ports) { for (const port of mainContainer.ports) { - mainContainerContextPorts[port.containerPort] = - mainContainerContextPorts.hostPort + if (port.containerPort && port.hostPort) { + mainContainerContextPorts[port.containerPort.toString()] = + port.hostPort.toString() + } } } @@ -217,6 +184,17 @@ function generateResponseFile( writeToResponseFile(responseFile, JSON.stringify(response)) } +async function copyExternalsToRoot(): Promise { + const workspace = process.env['RUNNER_WORKSPACE'] + if (workspace) { + await io.cp( + path.join(workspace, '../../externals'), + path.join(workspace, '../externals'), + { force: true, recursive: true, copySourceDirectory: false } + ) + } +} + export function createContainerSpec( container: JobContainerInfo | ServiceContainerInfo, name: string, @@ -250,7 +228,7 @@ export function createContainerSpec( container['environmentVariables'] || {} )) { if (value && key !== 'HOME') { - podContainer.env.push({ name: key, value }) + podContainer.env.push({ name: key, value: value as string }) } } @@ -266,7 +244,10 @@ export function createContainerSpec( }) } - podContainer.volumeMounts = CONTAINER_VOLUMES + podContainer.volumeMounts = containerVolumes( + container['userMountVolumes'], + jobContainer + ) if (!extension) { return podContainer diff --git a/packages/k8s/src/hooks/run-container-step.ts b/packages/k8s/src/hooks/run-container-step.ts index 1786a38a..07e0ac32 100644 --- a/packages/k8s/src/hooks/run-container-step.ts +++ b/packages/k8s/src/hooks/run-container-step.ts @@ -1,31 +1,23 @@ import * as core from '@actions/core' -import * as fs from 'fs' import * as k8s from '@kubernetes/client-node' import { RunContainerStepArgs } from 'hooklib' -import { dirname } from 'path' import { - createContainerStepPod, - deletePod, - execCpFromPod, - execCpToPod, - execPodStep, - getPrepareJobTimeoutSeconds, + createJob, + createSecretForEnvs, + getContainerJobPodName, + getPodLogs, + getPodStatus, + waitForJobToComplete, waitForPodPhases } from '../k8s' import { - CONTAINER_VOLUMES, + containerVolumes, + fixArgs, mergeContainerWithOptions, PodPhase, - readExtensionFromFile, - DEFAULT_CONTAINER_ENTRY_POINT_ARGS, - writeContainerStepScript + readExtensionFromFile } from '../k8s/utils' -import { - getJobPodName, - getStepPodName, - JOB_CONTAINER_EXTENSION_NAME, - JOB_CONTAINER_NAME -} from './constants' +import { JOB_CONTAINER_EXTENSION_NAME, JOB_CONTAINER_NAME } from './constants' export async function runContainerStep( stepContainer: RunContainerStepArgs @@ -34,109 +26,118 @@ export async function runContainerStep( throw new Error('Building container actions is not currently supported') } - if (!stepContainer.entryPoint) { - throw new Error( - 'failed to start the container since the entrypoint is overwritten' - ) - } - - const envs = stepContainer.environmentVariables || {} - envs['GITHUB_ACTIONS'] = 'true' - if (!('CI' in envs)) { - envs.CI = 'true' + let secretName: string | undefined = undefined + if (stepContainer.environmentVariables) { + try { + const envs = JSON.parse( + JSON.stringify(stepContainer.environmentVariables) + ) + envs['GITHUB_ACTIONS'] = 'true' + if (!('CI' in envs)) { + envs.CI = 'true' + } + secretName = await createSecretForEnvs(envs) + } catch (err) { + core.debug(`createSecretForEnvs failed: ${JSON.stringify(err)}`) + const message = (err as any)?.response?.body?.message || err + throw new Error(`failed to create script environment: ${message}`) + } } const extension = readExtensionFromFile() - const container = createContainerSpec(stepContainer, extension) + core.debug(`Created secret ${secretName} for container job envs`) + const container = createContainerSpec(stepContainer, secretName, extension) - let pod: k8s.V1Pod + let job: k8s.V1Job try { - pod = await createContainerStepPod(getStepPodName(), container, extension) + job = await createJob(container, extension) } catch (err) { core.debug(`createJob failed: ${JSON.stringify(err)}`) const message = (err as any)?.response?.body?.message || err throw new Error(`failed to run script step: ${message}`) } - if (!pod.metadata?.name) { + if (!job.metadata?.name) { throw new Error( `Expected job ${JSON.stringify( - pod + job )} to have correctly set the metadata.name` ) } - const podName = pod.metadata.name + core.debug(`Job created, waiting for pod to start: ${job.metadata?.name}`) + let podName: string try { - await waitForPodPhases( - podName, - new Set([PodPhase.RUNNING]), - new Set([PodPhase.PENDING, PodPhase.UNKNOWN]), - getPrepareJobTimeoutSeconds() - ) - - const runnerWorkspace = dirname(process.env.RUNNER_WORKSPACE as string) - const githubWorkspace = process.env.GITHUB_WORKSPACE as string - const parts = githubWorkspace.split('/').slice(-2) - if (parts.length !== 2) { - throw new Error(`Invalid github workspace directory: ${githubWorkspace}`) - } - const relativeWorkspace = parts.join('/') + podName = await getContainerJobPodName(job.metadata.name) + } catch (err) { + core.debug(`getContainerJobPodName failed: ${JSON.stringify(err)}`) + const message = (err as any)?.response?.body?.message || err + throw new Error(`failed to get container job pod name: ${message}`) + } - core.debug( - `Copying files from pod ${getJobPodName()} to ${runnerWorkspace}/${relativeWorkspace}` - ) - await execCpFromPod(getJobPodName(), `/__w`, `${runnerWorkspace}`) - - const { containerPath, runnerPath } = writeContainerStepScript( - `${runnerWorkspace}/__w/_temp`, - githubWorkspace, - stepContainer.entryPoint, - stepContainer.entryPointArgs, - envs - ) + await waitForPodPhases( + podName, + new Set([ + PodPhase.COMPLETED, + PodPhase.RUNNING, + PodPhase.SUCCEEDED, + PodPhase.FAILED + ]), + new Set([PodPhase.PENDING, PodPhase.UNKNOWN]) + ) + core.debug('Container step is running or complete, pulling logs') - await execCpToPod(podName, `${runnerWorkspace}/__w`, '/__w') + await getPodLogs(podName, JOB_CONTAINER_NAME) - fs.rmSync(`${runnerWorkspace}/__w`, { recursive: true, force: true }) + core.debug('Waiting for container job to complete') + await waitForJobToComplete(job.metadata.name) - try { - core.debug(`Executing container step script in pod ${podName}`) - return await execPodStep( - ['sh', '-e', containerPath], - pod.metadata.name, - JOB_CONTAINER_NAME - ) - } catch (err) { - core.debug(`execPodStep failed: ${JSON.stringify(err)}`) - const message = (err as any)?.response?.body?.message || err - throw new Error(`failed to run script step: ${message}`) - } finally { - fs.rmSync(runnerPath, { force: true }) - } - } catch (error) { - core.error(`Failed to run container step: ${error}`) - throw error - } finally { - await deletePod(podName).catch(err => { - core.error(`Failed to delete step pod ${podName}: ${err}`) - }) + const status = await getPodStatus(podName) + if (status?.phase === 'Succeeded') { + return 0 } + if (!status?.containerStatuses?.length) { + core.error( + `Can't determine container status from response: ${JSON.stringify( + status + )}` + ) + return 1 + } + const exitCode = + status.containerStatuses[status.containerStatuses.length - 1].state + ?.terminated?.exitCode + return Number(exitCode) || 1 } function createContainerSpec( container: RunContainerStepArgs, + secretName?: string, extension?: k8s.V1PodTemplateSpec ): k8s.V1Container { const podContainer = new k8s.V1Container() podContainer.name = JOB_CONTAINER_NAME podContainer.image = container.image - podContainer.workingDir = '/__w' - podContainer.command = ['tail'] - podContainer.args = DEFAULT_CONTAINER_ENTRY_POINT_ARGS - - podContainer.volumeMounts = CONTAINER_VOLUMES + podContainer.workingDir = container.workingDirectory + podContainer.command = container.entryPoint + ? [container.entryPoint] + : undefined + podContainer.args = container.entryPointArgs?.length + ? fixArgs(container.entryPointArgs) + : undefined + + if (secretName) { + podContainer.envFrom = [ + { + secretRef: { + name: secretName, + optional: false + } + } + ] + } + podContainer.volumeMounts = containerVolumes(undefined, false, true) if (!extension) { return podContainer diff --git a/packages/k8s/src/hooks/run-script-step.ts b/packages/k8s/src/hooks/run-script-step.ts index 1db2bb2e..7db0514d 100644 --- a/packages/k8s/src/hooks/run-script-step.ts +++ b/packages/k8s/src/hooks/run-script-step.ts @@ -2,19 +2,17 @@ import * as fs from 'fs' import * as core from '@actions/core' import { RunScriptStepArgs } from 'hooklib' -import { execCpFromPod, execCpToPod, execPodStep } from '../k8s' -import { writeRunScript, sleep, listDirAllCommand } from '../k8s/utils' +import { execPodStep } from '../k8s' +import { writeEntryPointScript } from '../k8s/utils' import { JOB_CONTAINER_NAME } from './constants' -import { dirname } from 'path' -import * as shlex from 'shlex' export async function runScriptStep( args: RunScriptStepArgs, - state + state, + responseFile? ): Promise { - // Write the entrypoint first. This will be later coppied to the workflow pod const { entryPoint, entryPointArgs, environmentVariables } = args - const { containerPath, runnerPath } = writeRunScript( + const { containerPath, runnerPath } = writeEntryPointScript( args.workingDirectory, entryPoint, entryPointArgs, @@ -22,56 +20,6 @@ export async function runScriptStep( environmentVariables ) - const workdir = dirname(process.env.RUNNER_WORKSPACE as string) - const runnerTemp = `${workdir}/_temp` - const containerTemp = '/__w/_temp' - const containerTempSrc = '/__w/_temp_pre' - // Ensure base and staging dirs exist before copying - await execPodStep( - [ - 'sh', - '-c', - 'mkdir -p /__w && mkdir -p /__w/_temp && mkdir -p /__w/_temp_pre' - ], - state.jobPod, - JOB_CONTAINER_NAME - ) - await execCpToPod(state.jobPod, runnerTemp, containerTempSrc) - - // Copy GitHub directories from temp to /github - // Merge strategy: - // - Overwrite files in _runner_file_commands - // - Append files not already present elsewhere - const mergeCommands = [ - 'set -e', - 'mkdir -p /__w/_temp /__w/_temp_pre', - 'SRC=/__w/_temp_pre', - 'DST=/__w/_temp', - // Overwrite _runner_file_commands - 'cp -a "$SRC/_runner_file_commands/." "$DST/_runner_file_commands"', - `find "$SRC" -type f ! -path "*/_runner_file_commands/*" -exec sh -c ' - rel="\${1#$2/}" - target="$3/$rel" - mkdir -p "$(dirname "$target")" - cp -a "$1" "$target" - ' _ {} "$SRC" "$DST" \\;`, - // Remove _temp_pre after merging - 'rm -rf /__w/_temp_pre' - ] - - try { - await execPodStep( - ['sh', '-c', mergeCommands.join(' && ')], - state.jobPod, - JOB_CONTAINER_NAME - ) - } catch (err) { - core.debug(`Failed to merge temp directories: ${JSON.stringify(err)}`) - const message = (err as any)?.response?.body?.message || err - throw new Error(`failed to merge temp dirs: ${message}`) - } - - // Execute the entrypoint script args.entryPoint = 'sh' args.entryPointArgs = ['-e', containerPath] try { @@ -85,23 +33,6 @@ export async function runScriptStep( const message = (err as any)?.response?.body?.message || err throw new Error(`failed to run script step: ${message}`) } finally { - try { - fs.rmSync(runnerPath, { force: true }) - } catch (removeErr) { - core.debug(`Failed to remove file ${runnerPath}: ${removeErr}`) - } - } - - try { - core.debug( - `Copying from job pod '${state.jobPod}' ${containerTemp} to ${runnerTemp}` - ) - await execCpFromPod( - state.jobPod, - `${containerTemp}/_runner_file_commands`, - `${workdir}/_temp` - ) - } catch (error) { - core.warning('Failed to copy _temp from pod') + fs.rmSync(runnerPath) } } diff --git a/packages/k8s/src/k8s/index.ts b/packages/k8s/src/k8s/index.ts index ae773da3..40ab0882 100644 --- a/packages/k8s/src/k8s/index.ts +++ b/packages/k8s/src/k8s/index.ts @@ -1,14 +1,13 @@ import * as core from '@actions/core' -import * as path from 'path' -import { spawn } from 'child_process' import * as k8s from '@kubernetes/client-node' -import tar from 'tar-fs' import * as stream from 'stream' -import { WritableStreamBuffer } from 'stream-buffers' -import { createHash } from 'crypto' import type { ContainerInfo, Registry } from 'hooklib' import { + getJobPodName, + getRunnerPodName, getSecretName, + getStepPodName, + getVolumeClaimName, JOB_CONTAINER_NAME, RunnerInstanceLabel } from '../hooks/constants' @@ -16,14 +15,9 @@ import { PodPhase, mergePodSpecWithOptions, mergeObjectMeta, - fixArgs, - listDirAllCommand, - sleep, - EXTERNALS_VOLUME_NAME, - GITHUB_VOLUME_NAME, - WORK_VOLUME + useKubeScheduler, + fixArgs } from './utils' -import * as shlex from 'shlex' const kc = new k8s.KubeConfig() @@ -35,6 +29,8 @@ const k8sAuthorizationV1Api = kc.makeApiClient(k8s.AuthorizationV1Api) const DEFAULT_WAIT_FOR_POD_TIME_SECONDS = 10 * 60 // 10 min +export const POD_VOLUME_NAME = 'work' + export const requiredPermissions = [ { group: '', @@ -54,6 +50,12 @@ export const requiredPermissions = [ resource: 'pods', subresource: 'log' }, + { + group: 'batch', + verbs: ['get', 'list', 'create', 'delete'], + resource: 'jobs', + subresource: '' + }, { group: '', verbs: ['create', 'delete', 'get', 'list'], @@ -62,8 +64,7 @@ export const requiredPermissions = [ } ] -export async function createJobPod( - name: string, +export async function createPod( jobContainer?: k8s.V1Container, services?: k8s.V1Container[], registry?: Registry, @@ -83,7 +84,7 @@ export async function createJobPod( appPod.kind = 'Pod' appPod.metadata = new k8s.V1ObjectMeta() - appPod.metadata.name = name + appPod.metadata.name = getJobPodName() const instanceLabel = new RunnerInstanceLabel() appPod.metadata.labels = { @@ -93,68 +94,19 @@ export async function createJobPod( appPod.spec = new k8s.V1PodSpec() appPod.spec.containers = containers - appPod.spec.securityContext = { - fsGroup: 1001 - } - - // Extract working directory from GITHUB_WORKSPACE - // GITHUB_WORKSPACE is like /__w/repo-name/repo-name - const githubWorkspace = process.env.GITHUB_WORKSPACE - const workingDirPath = githubWorkspace?.split('/').slice(-2).join('/') ?? '' - - const initCommands = [ - 'mkdir -p /mnt/externals', - 'mkdir -p /mnt/work', - 'mkdir -p /mnt/github', - 'mv /home/runner/externals/* /mnt/externals/' - ] - - if (workingDirPath) { - initCommands.push(`mkdir -p /mnt/work/${workingDirPath}`) - } - - appPod.spec.initContainers = [ - { - name: 'fs-init', - image: - process.env.ACTIONS_RUNNER_IMAGE || - 'ghcr.io/actions/actions-runner:latest', - command: ['sh', '-c', initCommands.join(' && ')], - securityContext: { - runAsGroup: 1001, - runAsUser: 1001 - }, - volumeMounts: [ - { - name: EXTERNALS_VOLUME_NAME, - mountPath: '/mnt/externals' - }, - { - name: WORK_VOLUME, - mountPath: '/mnt/work' - }, - { - name: GITHUB_VOLUME_NAME, - mountPath: '/mnt/github' - } - ] - } - ] - appPod.spec.restartPolicy = 'Never' + const nodeName = await getCurrentNodeName() + if (useKubeScheduler()) { + appPod.spec.affinity = await getPodAffinity(nodeName) + } else { + appPod.spec.nodeName = nodeName + } + const claimName = getVolumeClaimName() appPod.spec.volumes = [ { - name: EXTERNALS_VOLUME_NAME, - emptyDir: {} - }, - { - name: GITHUB_VOLUME_NAME, - emptyDir: {} - }, - { - name: WORK_VOLUME, - emptyDir: {} + name: POD_VOLUME_NAME, + persistentVolumeClaim: { claimName } } ] @@ -182,62 +134,90 @@ export async function createJobPod( }) } -export async function createContainerStepPod( - name: string, +export async function createJob( container: k8s.V1Container, extension?: k8s.V1PodTemplateSpec -): Promise { - const appPod = new k8s.V1Pod() - - appPod.apiVersion = 'v1' - appPod.kind = 'Pod' - - appPod.metadata = new k8s.V1ObjectMeta() - appPod.metadata.name = name - - const instanceLabel = new RunnerInstanceLabel() - appPod.metadata.labels = { - [instanceLabel.key]: instanceLabel.value - } - appPod.metadata.annotations = {} - - appPod.spec = new k8s.V1PodSpec() - appPod.spec.containers = [container] - - appPod.spec.restartPolicy = 'Never' +): Promise { + const runnerInstanceLabel = new RunnerInstanceLabel() - appPod.spec.volumes = [ + const job = new k8s.V1Job() + job.apiVersion = 'batch/v1' + job.kind = 'Job' + job.metadata = new k8s.V1ObjectMeta() + job.metadata.name = getStepPodName() + job.metadata.labels = { [runnerInstanceLabel.key]: runnerInstanceLabel.value } + job.metadata.annotations = {} + + job.spec = new k8s.V1JobSpec() + job.spec.ttlSecondsAfterFinished = 300 + job.spec.backoffLimit = 0 + job.spec.template = new k8s.V1PodTemplateSpec() + + job.spec.template.spec = new k8s.V1PodSpec() + job.spec.template.metadata = new k8s.V1ObjectMeta() + job.spec.template.metadata.labels = {} + job.spec.template.metadata.annotations = {} + job.spec.template.spec.containers = [container] + job.spec.template.spec.restartPolicy = 'Never' + + const nodeName = await getCurrentNodeName() + if (useKubeScheduler()) { + job.spec.template.spec.affinity = await getPodAffinity(nodeName) + } else { + job.spec.template.spec.nodeName = nodeName + } + + const claimName = getVolumeClaimName() + job.spec.template.spec.volumes = [ { - name: EXTERNALS_VOLUME_NAME, - emptyDir: {} - }, - { - name: GITHUB_VOLUME_NAME, - emptyDir: {} - }, - { - name: WORK_VOLUME, - emptyDir: {} + name: POD_VOLUME_NAME, + persistentVolumeClaim: { claimName } } ] - if (extension?.metadata) { - mergeObjectMeta(appPod, extension.metadata) - } - - if (extension?.spec) { - mergePodSpecWithOptions(appPod.spec, extension.spec) + if (extension) { + if (extension.metadata) { + mergeObjectMeta(job, extension.metadata) + mergeObjectMeta(job.spec.template, extension.metadata) + } + if (extension.spec) { + mergePodSpecWithOptions(job.spec.template.spec, extension.spec) + } } - return await k8sApi.createNamespacedPod({ + return await k8sBatchV1Api.createNamespacedJob({ namespace: namespace(), - body: appPod + body: job }) } -export async function deletePod(name: string): Promise { +export async function getContainerJobPodName(jobName: string): Promise { + const selector = `job-name=${jobName}` + const backOffManager = new BackOffManager(60) + while (true) { + const podList = await k8sApi.listNamespacedPod({ + namespace: namespace(), + labelSelector: selector, + limit: 1 + }) + + if (!podList.items?.length) { + await backOffManager.backOff() + continue + } + + if (!podList.items[0].metadata?.name) { + throw new Error( + `Failed to determine the name of the pod for job ${jobName}` + ) + } + return podList.items[0].metadata.name + } +} + +export async function deletePod(podName: string): Promise { await k8sApi.deleteNamespacedPod({ - name, + name: podName, namespace: namespace(), gracePeriodSeconds: 0 }) @@ -264,7 +244,6 @@ export async function execPodStep( stdin ?? null, false /* tty */, resp => { - core.debug(`execPodStep response: ${JSON.stringify(resp)}`) if (resp.status === 'Success') { resolve(resp.code || 0) } else { @@ -282,290 +261,6 @@ export async function execPodStep( }) } -export async function execCalculateOutputHashSorted( - podName: string, - containerName: string, - command: string[] -): Promise { - const exec = new k8s.Exec(kc) - - let output = '' - const outputWriter = new stream.Writable({ - write(chunk, _enc, cb) { - try { - output += chunk.toString('utf8') - cb() - } catch (e) { - cb(e as Error) - } - } - }) - - await new Promise((resolve, reject) => { - exec - .exec( - namespace(), - podName, - containerName, - command, - outputWriter, // capture stdout - process.stderr, - null, - false /* tty */, - resp => { - core.debug(`internalExecOutput response: ${JSON.stringify(resp)}`) - if (resp.status === 'Success') { - resolve() - } else { - core.debug( - JSON.stringify({ - message: resp?.message, - details: resp?.details - }) - ) - reject(new Error(resp?.message || 'internalExecOutput failed')) - } - } - ) - .catch(e => reject(e)) - }) - - outputWriter.end() - - // Sort lines for consistent ordering across platforms - const sortedOutput = - output - .split('\n') - .filter(line => line.length > 0) - .sort() - .join('\n') + '\n' - - const hash = createHash('sha256') - hash.update(sortedOutput) - return hash.digest('hex') -} - -export async function localCalculateOutputHashSorted( - commands: string[] -): Promise { - return await new Promise((resolve, reject) => { - const child = spawn(commands[0], commands.slice(1), { - stdio: ['ignore', 'pipe', 'ignore'] - }) - - let output = '' - child.stdout.on('data', chunk => { - output += chunk.toString('utf8') - }) - child.on('error', reject) - child.on('close', (code: number) => { - if (code === 0) { - // Sort lines for consistent ordering across distributions/platforms - const sortedOutput = - output - .split('\n') - .filter(line => line.length > 0) - .sort() - .join('\n') + '\n' - - const hash = createHash('sha256') - hash.update(sortedOutput) - resolve(hash.digest('hex')) - } else { - reject(new Error(`child process exited with code ${code}`)) - } - }) - }) -} - -export async function execCpToPod( - podName: string, - runnerPath: string, - containerPath: string -): Promise { - core.debug(`Copying ${runnerPath} to pod ${podName} at ${containerPath}`) - - let attempt = 0 - while (true) { - try { - const exec = new k8s.Exec(kc) - // Use tar to extract with --no-same-owner to avoid ownership issues. - // Then use find to fix permissions. The -m flag helps but we also need to fix permissions after. - const command = [ - 'sh', - '-c', - `tar xf - --no-same-owner -C ${shlex.quote(containerPath)} 2>/dev/null; ` + - `find ${shlex.quote(containerPath)} -type f -exec chmod u+rw {} \\; 2>/dev/null; ` + - `find ${shlex.quote(containerPath)} -type d -exec chmod u+rwx {} \\; 2>/dev/null` - ] - const readStream = tar.pack(runnerPath) - const errStream = new WritableStreamBuffer() - await new Promise((resolve, reject) => { - exec - .exec( - namespace(), - podName, - JOB_CONTAINER_NAME, - command, - null, - errStream, - readStream, - false, - async status => { - if (errStream.size()) { - reject( - new Error( - `Error from execCpToPod - status: ${status.status}, details: \n ${errStream.getContentsAsString()}` - ) - ) - } - resolve(status) - } - ) - .catch(e => reject(e)) - }) - break - } catch (error) { - core.debug(`cpToPod: Attempt ${attempt + 1} failed: ${error}`) - attempt++ - if (attempt >= 30) { - throw new Error( - `cpToPod failed after ${attempt} attempts: ${JSON.stringify(error)}` - ) - } - await sleep(1000) - } - } - - let attempts = 15 - const delay = 1000 - for (let i = 0; i < attempts; i++) { - try { - const want = await localCalculateOutputHashSorted([ - 'sh', - '-c', - listDirAllCommand(runnerPath) - ]) - - const got = await execCalculateOutputHashSorted( - podName, - JOB_CONTAINER_NAME, - ['sh', '-c', listDirAllCommand(containerPath)] - ) - - if (got !== want) { - core.debug( - `The hash of the directory does not match the expected value; want='${want}' got='${got}'` - ) - await sleep(delay) - continue - } - - break - } catch (error) { - core.debug(`Attempt ${i + 1} failed: ${error}`) - await sleep(delay) - } - } -} - -export async function execCpFromPod( - podName: string, - containerPath: string, - parentRunnerPath: string -): Promise { - const targetRunnerPath = `${parentRunnerPath}/${path.basename(containerPath)}` - core.debug( - `Copying from pod ${podName} ${containerPath} to ${targetRunnerPath}` - ) - - let attempt = 0 - while (true) { - try { - // make temporary directory - const exec = new k8s.Exec(kc) - const containerPaths = containerPath.split('/') - const dirname = containerPaths.pop() as string - const command = [ - 'tar', - 'cf', - '-', - '-C', - containerPaths.join('/') || '/', - dirname - ] - const writerStream = tar.extract(parentRunnerPath) - const errStream = new WritableStreamBuffer() - - await new Promise((resolve, reject) => { - exec - .exec( - namespace(), - podName, - JOB_CONTAINER_NAME, - command, - writerStream, - errStream, - null, - false, - async status => { - if (errStream.size()) { - reject( - new Error( - `Error from cpFromPod - details: \n ${errStream.getContentsAsString()}` - ) - ) - } - resolve(status) - } - ) - .catch(e => reject(e)) - }) - break - } catch (error) { - core.debug(`Attempt ${attempt + 1} failed: ${error}`) - attempt++ - if (attempt >= 30) { - throw new Error( - `execCpFromPod failed after ${attempt} attempts: ${JSON.stringify(error)}` - ) - } - await sleep(1000) - } - } - - let attempts = 15 - const delay = 1000 - for (let i = 0; i < attempts; i++) { - try { - const want = await execCalculateOutputHashSorted( - podName, - JOB_CONTAINER_NAME, - ['sh', '-c', listDirAllCommand(containerPath)] - ) - - const got = await localCalculateOutputHashSorted([ - 'sh', - '-c', - listDirAllCommand(targetRunnerPath) - ]) - - if (got !== want) { - core.debug( - `The hash of the directory does not match the expected value; want='${want}' got='${got}'` - ) - await sleep(delay) - continue - } - - break - } catch (error) { - core.debug(`Attempt ${i + 1} failed: ${error}`) - await sleep(delay) - } - } -} - export async function waitForJobToComplete(jobName: string): Promise { const backOffManager = new BackOffManager() while (true) { @@ -649,9 +344,9 @@ export async function createSecretForEnvs(envs: { return secretName } -export async function deleteSecret(name: string): Promise { +export async function deleteSecret(secretName: string): Promise { await k8sApi.deleteNamespacedSecret({ - name, + name: secretName, namespace: namespace() }) } @@ -667,8 +362,7 @@ export async function pruneSecrets(): Promise { await Promise.all( secretList.items.map( - async secret => - secret.metadata?.name && (await deleteSecret(secret.metadata.name)) + secret => secret.metadata?.name && deleteSecret(secret.metadata.name) ) ) } @@ -784,9 +478,7 @@ export async function prunePods(): Promise { } await Promise.all( - podList.items.map( - async pod => pod.metadata?.name && (await deletePod(pod.metadata.name)) - ) + podList.items.map(pod => pod.metadata?.name && deletePod(pod.metadata.name)) ) } @@ -831,12 +523,12 @@ export async function isPodContainerAlpine( [ 'sh', '-c', - `[ $(cat /etc/*release* | grep -i -e "^ID=*alpine*" -c) != 0 ] || exit 1` + `'[ $(cat /etc/*release* | grep -i -e "^ID=*alpine*" -c) != 0 ] || exit 1'` ], podName, containerName ) - } catch { + } catch (err) { isAlpine = false } @@ -857,6 +549,39 @@ export function namespace(): string { return context.namespace } +async function getCurrentNodeName(): Promise { + const resp = await k8sApi.readNamespacedPod({ + name: getRunnerPodName(), + namespace: namespace() + }) + + const nodeName = resp.spec?.nodeName + if (!nodeName) { + throw new Error('Failed to determine node name') + } + return nodeName +} + +async function getPodAffinity(nodeName: string): Promise { + const affinity = new k8s.V1Affinity() + affinity.nodeAffinity = new k8s.V1NodeAffinity() + affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution = + new k8s.V1NodeSelector() + affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms = + [ + { + matchExpressions: [ + { + key: 'kubernetes.io/hostname', + operator: 'In', + values: [nodeName] + } + ] + } + ] + return affinity +} + class BackOffManager { private backOffSeconds = 1 totalTime = 0 diff --git a/packages/k8s/src/k8s/utils.ts b/packages/k8s/src/k8s/utils.ts index 9e744004..890f6785 100644 --- a/packages/k8s/src/k8s/utils.ts +++ b/packages/k8s/src/k8s/utils.ts @@ -6,6 +6,8 @@ import { v1 as uuidv4 } from 'uuid' import { CONTAINER_EXTENSION_PREFIX } from '../hooks/constants' import * as shlex from 'shlex' import { Mount } from 'hooklib' +import * as path from 'path' +import { POD_VOLUME_NAME } from './index' export const DEFAULT_CONTAINER_ENTRY_POINT_ARGS = [`-f`, `/dev/null`] export const DEFAULT_CONTAINER_ENTRY_POINT = 'tail' @@ -13,24 +15,98 @@ export const DEFAULT_CONTAINER_ENTRY_POINT = 'tail' export const ENV_HOOK_TEMPLATE_PATH = 'ACTIONS_RUNNER_CONTAINER_HOOK_TEMPLATE' export const ENV_USE_KUBE_SCHEDULER = 'ACTIONS_RUNNER_USE_KUBE_SCHEDULER' -export const EXTERNALS_VOLUME_NAME = 'externals' -export const GITHUB_VOLUME_NAME = 'github' -export const WORK_VOLUME = 'work' - -export const CONTAINER_VOLUMES: k8s.V1VolumeMount[] = [ - { - name: EXTERNALS_VOLUME_NAME, - mountPath: '/__e' - }, - { - name: WORK_VOLUME, - mountPath: '/__w' - }, - { - name: GITHUB_VOLUME_NAME, - mountPath: '/github' +export function containerVolumes( + userMountVolumes: Mount[] = [], + jobContainer = true, + containerAction = false +): k8s.V1VolumeMount[] { + const mounts: k8s.V1VolumeMount[] = [ + { + name: POD_VOLUME_NAME, + mountPath: '/__w' + } + ] + + const workspacePath = process.env.GITHUB_WORKSPACE as string + if (containerAction) { + const i = workspacePath.lastIndexOf('_work/') + const workspaceRelativePath = workspacePath.slice(i + '_work/'.length) + mounts.push( + { + name: POD_VOLUME_NAME, + mountPath: '/github/workspace', + subPath: workspaceRelativePath + }, + { + name: POD_VOLUME_NAME, + mountPath: '/github/file_commands', + subPath: '_temp/_runner_file_commands' + }, + { + name: POD_VOLUME_NAME, + mountPath: '/github/home', + subPath: '_temp/_github_home' + }, + { + name: POD_VOLUME_NAME, + mountPath: '/github/workflow', + subPath: '_temp/_github_workflow' + } + ) + return mounts + } + + if (!jobContainer) { + return mounts + } + + mounts.push( + { + name: POD_VOLUME_NAME, + mountPath: '/__e', + subPath: 'externals' + }, + { + name: POD_VOLUME_NAME, + mountPath: '/github/home', + subPath: '_temp/_github_home' + }, + { + name: POD_VOLUME_NAME, + mountPath: '/github/workflow', + subPath: '_temp/_github_workflow' + } + ) + + if (!userMountVolumes?.length) { + return mounts } -] + + for (const userVolume of userMountVolumes) { + let sourceVolumePath = '' + if (path.isAbsolute(userVolume.sourceVolumePath)) { + if (!userVolume.sourceVolumePath.startsWith(workspacePath)) { + throw new Error( + 'Volume mounts outside of the work folder are not supported' + ) + } + sourceVolumePath = userVolume.sourceVolumePath.slice( + workspacePath.length + 1 + ) + } else { + sourceVolumePath = userVolume.sourceVolumePath + } + + mounts.push({ + name: POD_VOLUME_NAME, + mountPath: userVolume.targetVolumePath, + subPath: sourceVolumePath, + readOnly: userVolume.readOnly + }) + } + + return mounts +} export function prepareJobScript(userVolumeMounts: Mount[]): { containerPath: string @@ -54,6 +130,38 @@ mkdir -p ${mountDirs} } } +export function writeEntryPointScript( + workingDirectory: string, + entryPoint: string, + entryPointArgs?: string[], + prependPath?: string[], + environmentVariables?: { [key: string]: string } +): { containerPath: string; runnerPath: string } { + let exportPath = '' + if (prependPath?.length) { + const prepend = + typeof prependPath === 'string' ? prependPath : prependPath.join(':') + exportPath = `export PATH=${prepend}:$PATH` + } + + const environmentPrefix = scriptEnv(environmentVariables) + + const content = `#!/bin/sh -l +${exportPath} +cd ${workingDirectory} && \\ +exec ${environmentPrefix} ${entryPoint} ${ + entryPointArgs?.length ? entryPointArgs.join(' ') : '' + } +` + const filename = `${uuidv4()}.sh` + const entryPointPath = `${process.env.RUNNER_TEMP}/${filename}` + fs.writeFileSync(entryPointPath, content) + return { + containerPath: `/__w/_temp/${filename}`, + runnerPath: entryPointPath + } +} + export function writeRunScript( workingDirectory: string, entryPoint: string, @@ -288,18 +396,5 @@ function mergeLists(base?: T[], from?: T[]): T[] { } export function fixArgs(args: string[]): string[] { - // Preserve shell command strings passed via `sh -c` without re-tokenizing. - // Retokenizing would split the script into multiple args, breaking `sh -c`. - if (args.length >= 2 && args[0] === 'sh' && args[1] === '-c') { - return args - } return shlex.split(args.join(' ')) } - -export async function sleep(ms: number): Promise { - return new Promise(resolve => setTimeout(resolve, ms)) -} - -export function listDirAllCommand(dir: string): string { - return `cd ${shlex.quote(dir)} && find . -type f -not -path '*/_runner_hook_responses*' -exec stat -c '%s %n' {} \\;` -} diff --git a/packages/k8s/tests/rwo-affinity-test.ts b/packages/k8s/tests/rwo-affinity-test.ts new file mode 100644 index 00000000..93d9ed63 --- /dev/null +++ b/packages/k8s/tests/rwo-affinity-test.ts @@ -0,0 +1,143 @@ +import * as fs from 'fs' +import { cleanupJob } from '../src/hooks' +import { prepareJob } from '../src/hooks/prepare-job' +import { TestHelper } from './test-setup' +import { getPodByName } from '../src/k8s' +import { ENV_USE_KUBE_SCHEDULER } from '../src/k8s/utils' + +jest.useRealTimers() + +let testHelper: TestHelper +let prepareJobData: any +let prepareJobOutputFilePath: string + +describe('RWO Affinity Behavior (Scheduler Mode)', () => { + beforeEach(async () => { + testHelper = new TestHelper() + await testHelper.initialize() + prepareJobData = testHelper.getPrepareJobDefinition() + prepareJobOutputFilePath = testHelper.createFile('prepare-job-output.json') + }) + + afterEach(async () => { + await cleanupJob() + await testHelper.cleanup() + delete process.env[ENV_USE_KUBE_SCHEDULER] + }) + + it('should add nodeAffinity with hostname selector when scheduler mode is enabled', async () => { + process.env[ENV_USE_KUBE_SCHEDULER] = 'true' + + await prepareJob(prepareJobData.args, prepareJobOutputFilePath) + + const content = JSON.parse( + fs.readFileSync(prepareJobOutputFilePath).toString() + ) + + const pod = await getPodByName(content.state.jobPod) + + expect(pod.spec?.affinity).toBeDefined() + expect(pod.spec?.affinity?.nodeAffinity).toBeDefined() + + const nodeAffinity = pod.spec?.affinity?.nodeAffinity + expect( + nodeAffinity?.requiredDuringSchedulingIgnoredDuringExecution + ).toBeDefined() + + const nodeSelectorTerms = + nodeAffinity?.requiredDuringSchedulingIgnoredDuringExecution + ?.nodeSelectorTerms + + expect(nodeSelectorTerms).toBeDefined() + expect(nodeSelectorTerms?.length).toBeGreaterThan(0) + + const matchExpressions = nodeSelectorTerms?.[0].matchExpressions + expect(matchExpressions).toBeDefined() + expect(matchExpressions?.length).toBeGreaterThan(0) + + const hostnameExpression = matchExpressions?.[0] + expect(hostnameExpression?.key).toBe('kubernetes.io/hostname') + expect(hostnameExpression?.operator).toBe('In') + + expect(hostnameExpression?.values).toBeDefined() + expect(hostnameExpression?.values?.length).toBeGreaterThan(0) + expect(hostnameExpression?.values?.[0]).toBeTruthy() + }) + + it('should NOT add nodeAffinity when scheduler mode is disabled', async () => { + process.env[ENV_USE_KUBE_SCHEDULER] = 'false' + + await prepareJob(prepareJobData.args, prepareJobOutputFilePath) + + const content = JSON.parse( + fs.readFileSync(prepareJobOutputFilePath).toString() + ) + + const pod = await getPodByName(content.state.jobPod) + + if (pod.spec?.affinity) { + expect(pod.spec.affinity.nodeAffinity).toBeUndefined() + } + + expect(pod.spec?.nodeName).toBeDefined() + }) + + it('should fail assertion if affinity block is missing when scheduler mode is enabled', async () => { + process.env[ENV_USE_KUBE_SCHEDULER] = 'true' + + await prepareJob(prepareJobData.args, prepareJobOutputFilePath) + + const content = JSON.parse( + fs.readFileSync(prepareJobOutputFilePath).toString() + ) + + const pod = await getPodByName(content.state.jobPod) + + expect(pod.spec?.affinity).toBeDefined() + expect(pod.spec?.affinity?.nodeAffinity).toBeDefined() + expect( + pod.spec?.affinity?.nodeAffinity + ?.requiredDuringSchedulingIgnoredDuringExecution + ).toBeDefined() + + const nodeSelectorTerms = + pod.spec?.affinity?.nodeAffinity + ?.requiredDuringSchedulingIgnoredDuringExecution?.nodeSelectorTerms + + expect(nodeSelectorTerms?.[0]?.matchExpressions?.[0]?.key).toBe( + 'kubernetes.io/hostname' + ) + expect(nodeSelectorTerms?.[0]?.matchExpressions?.[0]?.operator).toBe('In') + expect( + nodeSelectorTerms?.[0]?.matchExpressions?.[0]?.values?.length + ).toBeGreaterThan(0) + }) + + it('should use correct node name from runner pod in affinity values', async () => { + process.env[ENV_USE_KUBE_SCHEDULER] = 'true' + + const runnerPodName = process.env.ACTIONS_RUNNER_POD_NAME + + await prepareJob(prepareJobData.args, prepareJobOutputFilePath) + + const content = JSON.parse( + fs.readFileSync(prepareJobOutputFilePath).toString() + ) + + const jobPod = await getPodByName(content.state.jobPod) + + const runnerPod = await getPodByName(runnerPodName!) + + const affinityValues = + jobPod.spec?.affinity?.nodeAffinity + ?.requiredDuringSchedulingIgnoredDuringExecution?.nodeSelectorTerms?.[0] + ?.matchExpressions?.[0]?.values + + expect(affinityValues).toBeDefined() + expect(affinityValues?.length).toBeGreaterThan(0) + + if (runnerPod.spec?.nodeName) { + expect(affinityValues).toContain(runnerPod.spec.nodeName) + } + }) +}) diff --git a/packages/k8s/tests/rwx-contract-demo-test.ts b/packages/k8s/tests/rwx-contract-demo-test.ts new file mode 100644 index 00000000..e3556b47 --- /dev/null +++ b/packages/k8s/tests/rwx-contract-demo-test.ts @@ -0,0 +1,28 @@ +import { + isRWXTestEnabled, + getRWXStorageClass, + RWX_SKIP_MESSAGE +} from './test-setup' + +describe('RWX Test Contract Demo', () => { + const describeOrSkip = isRWXTestEnabled() ? describe : describe.skip + + describeOrSkip('RWX volume tests', () => { + it('should use RWX storage class when enabled', () => { + const storageClass = getRWXStorageClass() + expect(storageClass).toBeDefined() + expect(typeof storageClass).toBe('string') + }) + + it('should verify both env vars are required', () => { + expect(process.env.ACTIONS_RUNNER_K8S_TEST_ENABLE_RWX).toBe('true') + expect( + process.env.ACTIONS_RUNNER_K8S_TEST_RWX_STORAGE_CLASS + ).toBeDefined() + }) + }) + + if (!isRWXTestEnabled()) { + it(RWX_SKIP_MESSAGE, () => {}) + } +}) diff --git a/packages/k8s/tests/rwx-volume-test.ts b/packages/k8s/tests/rwx-volume-test.ts new file mode 100644 index 00000000..22ff848e --- /dev/null +++ b/packages/k8s/tests/rwx-volume-test.ts @@ -0,0 +1,119 @@ +import * as k8s from '@kubernetes/client-node' +import * as fs from 'fs' +import { cleanupJob, prepareJob, runScriptStep } from '../src/hooks' +import { + TestHelper, + isRWXTestEnabled, + getRWXStorageClass, + RWX_SKIP_MESSAGE +} from './test-setup' +import { RunScriptStepArgs } from 'hooklib' + +jest.useRealTimers() + +const kc = new k8s.KubeConfig() +kc.loadFromDefault() +const k8sApi = kc.makeApiClient(k8s.CoreV1Api) + +describe('RWX Volume Tests', () => { + const describeOrSkip = isRWXTestEnabled() ? describe : describe.skip + + describeOrSkip('RWX volume integration', () => { + let testHelper: TestHelper + let rwxPvcName: string + let prepareJobData: any + let prepareJobOutputFilePath: string + + beforeEach(async () => { + testHelper = new TestHelper() + await testHelper.initialize() + + const podName = process.env.ACTIONS_RUNNER_POD_NAME + rwxPvcName = `${podName}-work-rwx` + + const volumeClaim: k8s.V1PersistentVolumeClaim = { + metadata: { + name: rwxPvcName + }, + spec: { + accessModes: ['ReadWriteMany'], + volumeMode: 'Filesystem', + storageClassName: getRWXStorageClass(), + resources: { + requests: { + storage: '1Gi' + } + } + } + } + + await k8sApi.createNamespacedPersistentVolumeClaim({ + namespace: 'default', + body: volumeClaim + }) + + process.env.ACTIONS_RUNNER_CLAIM_NAME = rwxPvcName + + prepareJobData = testHelper.getPrepareJobDefinition() + prepareJobOutputFilePath = testHelper.createFile( + 'prepare-job-output.json' + ) + }) + + afterAll(async () => { + if (rwxPvcName) { + try { + await k8sApi.deleteNamespacedPersistentVolumeClaim({ + name: rwxPvcName, + namespace: 'default' + }) + } catch { + // Ignore cleanup errors - PVC may not exist + } + } + }) + + afterEach(async () => { + await testHelper.cleanup() + }) + + it('should successfully run hook flow with RWX volume', async () => { + await expect( + prepareJob(prepareJobData.args, prepareJobOutputFilePath) + ).resolves.not.toThrow() + + const prepareJobOutputJson = fs.readFileSync(prepareJobOutputFilePath) + const prepareJobOutputData = JSON.parse(prepareJobOutputJson.toString()) + + const scriptStepData = testHelper.getRunScriptStepDefinition() + + await expect( + runScriptStep( + scriptStepData.args as RunScriptStepArgs, + prepareJobOutputData.state + ) + ).resolves.not.toThrow() + + await expect(cleanupJob()).resolves.not.toThrow() + }) + + it('should verify RWX PVC was created with correct access mode', async () => { + const pvc = await k8sApi.readNamespacedPersistentVolumeClaim({ + name: rwxPvcName, + namespace: 'default' + }) + + expect(pvc.spec?.accessModes).toContain('ReadWriteMany') + expect(pvc.spec?.storageClassName).toBe(getRWXStorageClass()) + expect(pvc.spec?.volumeMode).toBe('Filesystem') + }) + + it('should verify RWX claim name is set correctly', () => { + expect(process.env.ACTIONS_RUNNER_CLAIM_NAME).toBe(rwxPvcName) + }) + }) + + if (!isRWXTestEnabled()) { + it(RWX_SKIP_MESSAGE, () => {}) + } +}) diff --git a/packages/k8s/tests/test-setup.ts b/packages/k8s/tests/test-setup.ts index 2c81ddb9..3ed6a604 100644 --- a/packages/k8s/tests/test-setup.ts +++ b/packages/k8s/tests/test-setup.ts @@ -9,6 +9,7 @@ const kc = new k8s.KubeConfig() kc.loadFromDefault() const k8sApi = kc.makeApiClient(k8s.CoreV1Api) +const k8sStorageApi = kc.makeApiClient(k8s.StorageV1Api) export class TestHelper { private tempDirPath: string @@ -46,6 +47,7 @@ export class TestHelper { await this.cleanupK8sResources() try { + await this.createTestVolume() await this.createTestJobPod() } catch (e) { console.log(e) @@ -62,6 +64,24 @@ export class TestHelper { } async cleanupK8sResources(): Promise { + await k8sApi + .deleteNamespacedPersistentVolumeClaim({ + name: `${this.podName}-work`, + namespace: 'default', + gracePeriodSeconds: 0 + }) + .catch((e: k8s.ApiException) => { + if (e.code !== 404) { + console.error(JSON.stringify(e)) + } + }) + await k8sApi + .deletePersistentVolume({ name: `${this.podName}-pv` }) + .catch((e: k8s.ApiException) => { + if (e.code !== 404) { + console.error(JSON.stringify(e)) + } + }) await k8sApi .deleteNamespacedPod({ name: this.podName, @@ -84,6 +104,14 @@ export class TestHelper { console.error(JSON.stringify(e)) } }) + + await k8sStorageApi + .deleteStorageClass({ name: `${this.podName}-storage` }) + .catch((e: k8s.ApiException) => { + if (e.code !== 404) { + console.error(JSON.stringify(e)) + } + }) } createFile(fileName?: string): string { const filePath = `${this.tempDirPath}/${fileName || uuidv4()}` @@ -120,6 +148,58 @@ export class TestHelper { await k8sApi.createNamespacedPod({ namespace: 'default', body: pod }) } + async createTestVolume(): Promise { + const storageClassName = `${this.podName}-storage` + + const sc: k8s.V1StorageClass = { + metadata: { + name: storageClassName + }, + provisioner: 'kubernetes.io/no-provisioner', + volumeBindingMode: 'Immediate' + } + await k8sStorageApi.createStorageClass({ body: sc }) + + const volume: k8s.V1PersistentVolume = { + metadata: { + name: `${this.podName}-pv` + }, + spec: { + storageClassName, + capacity: { + storage: '2Gi' + }, + volumeMode: 'Filesystem', + accessModes: ['ReadWriteOnce'], + hostPath: { + path: `${this.tempDirPath}/_work` + } + } + } + await k8sApi.createPersistentVolume({ body: volume }) + + const volumeClaim: k8s.V1PersistentVolumeClaim = { + metadata: { + name: `${this.podName}-work` + }, + spec: { + accessModes: ['ReadWriteOnce'], + volumeMode: 'Filesystem', + storageClassName, + volumeName: `${this.podName}-pv`, + resources: { + requests: { + storage: '1Gi' + } + } + } + } + await k8sApi.createNamespacedPersistentVolumeClaim({ + namespace: 'default', + body: volumeClaim + }) + } + getPrepareJobDefinition(): HookData { const prepareJob = JSON.parse( fs.readFileSync( @@ -163,3 +243,65 @@ export class TestHelper { return runContainerStep } } + +/** + * RWX Test Contract: + * + * Tests requiring ReadWriteMany (RWX) volumes MUST be gated by TWO environment variables: + * 1. ACTIONS_RUNNER_K8S_TEST_ENABLE_RWX=true (explicit opt-in) + * 2. ACTIONS_RUNNER_K8S_TEST_RWX_STORAGE_CLASS= (storage class that supports RWX) + * + * If either variable is missing or ACTIONS_RUNNER_K8S_TEST_ENABLE_RWX is not "true", + * the test MUST be skipped with the exact message defined in this contract. + * + * This contract ensures: + * - RWX tests do not fail on clusters without RWX provisioners + * - Test requirements are explicit and documented + * - RWO affinity tests remain independent and always runnable + * - Skip behavior is deterministic (no dynamic cluster probing) + * + * Usage example: + * ```typescript + * import { isRWXTestEnabled, getRWXStorageClass, RWX_SKIP_MESSAGE } from './test-setup' + * + * describe('RWX Test Suite', () => { + * const describeOrSkip = isRWXTestEnabled() ? describe : describe.skip + * + * describeOrSkip('RWX volume tests', () => { + * it('should test RWX functionality', async () => { + * const storageClass = getRWXStorageClass() + * // ... test code using storageClass + * }) + * }) + * + * if (!isRWXTestEnabled()) { + * it(RWX_SKIP_MESSAGE, () => {}) + * } + * }) + * ``` + */ + +/** + * Checks if RWX tests should run based on environment variables. + * @returns true if both ACTIONS_RUNNER_K8S_TEST_ENABLE_RWX=true and ACTIONS_RUNNER_K8S_TEST_RWX_STORAGE_CLASS are set + */ +export function isRWXTestEnabled(): boolean { + const enabled = process.env.ACTIONS_RUNNER_K8S_TEST_ENABLE_RWX === 'true' + const storageClass = process.env.ACTIONS_RUNNER_K8S_TEST_RWX_STORAGE_CLASS + return enabled && !!storageClass +} + +/** + * Gets the RWX storage class name from environment variable. + * @returns The storage class name, or undefined if not set + */ +export function getRWXStorageClass(): string | undefined { + return process.env.ACTIONS_RUNNER_K8S_TEST_RWX_STORAGE_CLASS +} + +/** + * Skip message constant - DO NOT MODIFY + * This exact message must be used when skipping RWX tests + */ +export const RWX_SKIP_MESSAGE = + 'RWX tests skipped: set ACTIONS_RUNNER_K8S_TEST_ENABLE_RWX=true and ACTIONS_RUNNER_K8S_TEST_RWX_STORAGE_CLASS' From 8c072d33b84aac08f4b251a7f153285ab9a77740 Mon Sep 17 00:00:00 2001 From: Nikola Jokic Date: Wed, 22 Apr 2026 22:59:40 +0200 Subject: [PATCH 03/11] lint fmt --- packages/k8s/src/hooks/prepare-job.ts | 2 +- packages/k8s/src/k8s/index.ts | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/packages/k8s/src/hooks/prepare-job.ts b/packages/k8s/src/hooks/prepare-job.ts index cce677c3..3c95bc6f 100644 --- a/packages/k8s/src/hooks/prepare-job.ts +++ b/packages/k8s/src/hooks/prepare-job.ts @@ -228,7 +228,7 @@ export function createContainerSpec( container['environmentVariables'] || {} )) { if (value && key !== 'HOME') { - podContainer.env.push({ name: key, value: value as string }) + podContainer.env.push({ name: key, value: value }) } } diff --git a/packages/k8s/src/k8s/index.ts b/packages/k8s/src/k8s/index.ts index 40ab0882..40a2e7ff 100644 --- a/packages/k8s/src/k8s/index.ts +++ b/packages/k8s/src/k8s/index.ts @@ -8,7 +8,6 @@ import { getSecretName, getStepPodName, getVolumeClaimName, - JOB_CONTAINER_NAME, RunnerInstanceLabel } from '../hooks/constants' import { @@ -362,7 +361,8 @@ export async function pruneSecrets(): Promise { await Promise.all( secretList.items.map( - secret => secret.metadata?.name && deleteSecret(secret.metadata.name) + async secret => + secret.metadata?.name && deleteSecret(secret.metadata.name) ) ) } @@ -478,7 +478,9 @@ export async function prunePods(): Promise { } await Promise.all( - podList.items.map(pod => pod.metadata?.name && deletePod(pod.metadata.name)) + podList.items.map( + async pod => pod.metadata?.name && deletePod(pod.metadata.name) + ) ) } @@ -528,7 +530,7 @@ export async function isPodContainerAlpine( podName, containerName ) - } catch (err) { + } catch { isAlpine = false } @@ -650,7 +652,7 @@ export function containerPorts( return ports } -export async function getPodByName(name): Promise { +export async function getPodByName(name: string): Promise { return await k8sApi.readNamespacedPod({ name, namespace: namespace() From 493e2bead2f99581199c85ba50e5806388e80d8e Mon Sep 17 00:00:00 2001 From: Nikola Jokic Date: Thu, 23 Apr 2026 00:09:05 +0200 Subject: [PATCH 04/11] docs(adr,k8s): clarify RWX enables free scheduling, RWO requires affinity - RWX volumes allow job pods to be scheduled on any cluster node - RWO volumes require affinity to pin job pods to runner's node - Remove ACTIONS_RUNNER_USE_KUBE_SCHEDULER from RWX migration steps - Emphasize resource utilization benefits of RWX free scheduling Co-authored-by: Sisyphus --- docs/adrs/0135-rwx-volume-strategy.md | 12 ++++++------ packages/k8s/README.md | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/adrs/0135-rwx-volume-strategy.md b/docs/adrs/0135-rwx-volume-strategy.md index 646e7092..8d35372e 100644 --- a/docs/adrs/0135-rwx-volume-strategy.md +++ b/docs/adrs/0135-rwx-volume-strategy.md @@ -8,13 +8,13 @@ The Kubernetes hook implementation for GitHub Actions runners requires access to the runner's working directory (`_work`) within the dynamically created job pods. This shared access is typically managed via Persistent Volume Claims (PVCs). -Regardless of the storage strategy, job pods are always constrained to run on the same node as the runner pod to ensure consistent access to the local environment and state. The choice of volume access mode determines operational flexibility and multi-pod access capability rather than pod placement. +The choice of storage strategy significantly impacts pod scheduling. While ReadWriteOnce (RWO) volumes require job pods to be co-located on the same node as the runner pod, ReadWriteMany (RWX) volumes allow job pods to be scheduled freely across the cluster. -Depending on the storage provider and cluster configuration, operators may choose between `ReadWriteMany` (RWX) or `ReadWriteOnce` (RWO) access modes. RWX is preferred because it allows multiple pods to access the volume simultaneously, providing greater operational flexibility for future scaling or monitoring scenarios. RWO restricts volume access to a single pod at a time, locking the volume to that pod's specific node. +Depending on the storage provider and cluster configuration, operators may choose between `ReadWriteMany` (RWX) or `ReadWriteOnce` (RWO) access modes. RWX is preferred because it allows the Kubernetes scheduler to place job pods on any available node, improving resource utilization and cluster flexibility. RWO restricts volume access to a single node at a time, requiring all pods using the volume to be pinned to that specific node. ## Decision -We have decided to establish `ReadWriteMany` (RWX) as the preferred storage strategy for the Kubernetes hook. While job pods remain pinned to the runner's node, RWX provides superior operational flexibility by allowing multiple pods (such as sidecars or auxiliary tools) to access the same volume without storage-imposed locking constraints. +We have decided to establish `ReadWriteMany` (RWX) as the preferred storage strategy for the Kubernetes hook. RWX provides superior operational flexibility by enabling free scheduling of job pods across the cluster, as the shared volume is accessible from any node. This decoupling of job pods from the runner's node allows for better resource distribution and reduces the risk of node-level resource exhaustion. For environments where RWX is unavailable or undesirable, we support a `ReadWriteOnce` (RWO) fallback strategy. This fallback is implemented using node affinity to ensure that job pods are scheduled onto the same node as the runner pod that holds the RWO volume. @@ -39,9 +39,9 @@ We explicitly do **not** recommend the use of `spec.nodeName` for operator-drive ## Consequences -- **Flexibility:** RWX users benefit from the ability to have multiple pods access the volume simultaneously, simplifying future operational extensions. -- **Node Coupling:** All users are coupled to the node where the runner pod is running. The hook ensures job pods are scheduled on the same node to maintain workspace integrity. -- **Configuration:** Operators must be aware of the `ACTIONS_RUNNER_USE_KUBE_SCHEDULER` toggle when moving from RWX to RWO. This toggle controls whether the hook uses `nodeName` (bypassing the scheduler) or node affinity (using the scheduler) to pin the pod to the runner's node. +- **Flexibility:** RWX users benefit from the ability to schedule job pods on any node in the cluster, maximizing resource utilization. +- **Node Coupling:** RWO users remain coupled to the node where the runner pod is running. The hook ensures job pods are scheduled on the same node via affinity to maintain workspace integrity. +- **Configuration:** Operators must be aware of the `ACTIONS_RUNNER_USE_KUBE_SCHEDULER` toggle when using RWO. This toggle controls whether the hook uses `nodeName` (bypassing the scheduler) or node affinity (using the scheduler) to pin the pod to the runner's node. RWX configurations do not require this toggle for basic operation. ## Migration Guidance diff --git a/packages/k8s/README.md b/packages/k8s/README.md index 23d92c79..030fb950 100644 --- a/packages/k8s/README.md +++ b/packages/k8s/README.md @@ -36,12 +36,12 @@ rules: The K8s hooks require a shared volume between the runner pod and the job pods to share the workspace and other internal directories. ### RWX (Recommended) -The preferred way to configure storage is using a `ReadWriteMany` (RWX) Persistent Volume Claim. While job pods are always pinned to the runner's node, RWX provides better operational flexibility by allowing multiple pods to access the same workspace simultaneously. +The preferred way to configure storage is using a `ReadWriteMany` (RWX) Persistent Volume Claim. RWX allows the Kubernetes scheduler to place job pods on any node in the cluster, maximizing resource availability and flexibility. To migrate from RWO to RWX: 1. Provision a new `ReadWriteMany` StorageClass if one is not available. 2. Update your PVC definition to use `accessModes: [ReadWriteMany]`. -3. Set `ACTIONS_RUNNER_USE_KUBE_SCHEDULER=true` to enable the scheduler-based node pinning (via affinity) instead of the default `nodeName` pinning. +3. Remove the `ACTIONS_RUNNER_USE_KUBE_SCHEDULER` environment variable, as affinity is no longer required for pod placement. ### RWO Fallback (Affinity-based) If `ReadWriteMany` storage is not available, you can use `ReadWriteOnce` (RWO) storage. In this mode, all job pods must be scheduled on the same node as the runner pod that owns the PVC. From 66527bac170eb5a7b21f59031343cbb21dfbe1bf Mon Sep 17 00:00:00 2001 From: Nikola Jokic Date: Thu, 23 Apr 2026 00:23:47 +0200 Subject: [PATCH 05/11] refactor(k8s): invert scheduler env var to make affinity default Change ACTIONS_RUNNER_USE_KUBE_SCHEDULER to ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER to make affinity-based scheduling the first-class (default) implementation. Breaking Change: - OLD: Set ACTIONS_RUNNER_USE_KUBE_SCHEDULER=true to enable affinity (opt-in) - NEW: Affinity is enabled by default, set ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER=true to disable (opt-out) Code changes: - utils.ts: Rename constant and invert useKubeScheduler() logic - rwo-affinity-test.ts: Update tests to verify default affinity behavior - ADR 0135: Update to reflect opt-out model - README: Update guidance to reflect default behavior Co-authored-by: Sisyphus --- docs/adrs/0135-rwx-volume-strategy.md | 16 ++++++++-------- packages/k8s/README.md | 6 +++--- packages/k8s/src/k8s/utils.ts | 5 +++-- packages/k8s/tests/rwo-affinity-test.ts | 18 ++++++------------ 4 files changed, 20 insertions(+), 25 deletions(-) diff --git a/docs/adrs/0135-rwx-volume-strategy.md b/docs/adrs/0135-rwx-volume-strategy.md index 8d35372e..9755fc4e 100644 --- a/docs/adrs/0135-rwx-volume-strategy.md +++ b/docs/adrs/0135-rwx-volume-strategy.md @@ -21,16 +21,16 @@ For environments where RWX is unavailable or undesirable, we support a `ReadWrit ### Operational Guidance 1. **Preferred Model (RWX):** Operators should configure the runner with a PVC supporting `ReadWriteMany`. -2. **Fallback Model (RWO):** If using `ReadWriteOnce`, operators must enable the Kubernetes scheduler integration by setting `ACTIONS_RUNNER_USE_KUBE_SCHEDULER=true`. -3. **Node Selection:** When scheduler integration is enabled, the hook applies a `requiredDuringSchedulingIgnoredDuringExecution` node affinity targeting the runner's current node (`kubernetes.io/hostname`). +2. **Fallback Model (RWO):** If using `ReadWriteOnce`, the Kubernetes scheduler integration is enabled by default. Operators can optionally disable it by setting `ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER=true` (not recommended). +3. **Node Selection:** By default, the hook applies a `requiredDuringSchedulingIgnoredDuringExecution` node affinity targeting the runner's current node (`kubernetes.io/hostname`). 4. **Implementation Details:** - The hook determines the node name via `getCurrentNodeName()` and applies affinity in `packages/k8s/src/k8s/index.ts` (lines 101, 165). - - The scheduler behavior is toggled by the `ACTIONS_RUNNER_USE_KUBE_SCHEDULER` environment variable, as defined in `packages/k8s/src/k8s/utils.ts` (line 16). + - The scheduler is enabled by default. Setting `ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER=true` disables it, as defined in `packages/k8s/src/k8s/utils.ts` (line 16). - The PVC claim name defaults to `${ACTIONS_RUNNER_POD_NAME}-work` unless overridden by `ACTIONS_RUNNER_CLAIM_NAME` (`packages/k8s/src/hooks/constants.ts`, lines 27-33). ### Non-Recommendations -We explicitly do **not** recommend the use of `spec.nodeName` for operator-driven scheduling. While the hook uses `nodeName` as a legacy fallback when `ACTIONS_RUNNER_USE_KUBE_SCHEDULER` is not set to `true` (`packages/k8s/src/k8s/index.ts`, lines 103, 167), this bypasses the Kubernetes scheduler and can lead to scheduling failures or resource imbalances. Operators should always prefer the affinity-based approach for RWO volumes. +We explicitly do **not** recommend the use of `spec.nodeName` for operator-driven scheduling. While the hook uses `nodeName` as a legacy fallback when `ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER` is set to `true` (`packages/k8s/src/k8s/index.ts`, lines 103, 167), this bypasses the Kubernetes scheduler and can lead to scheduling failures or resource imbalances. Operators should prefer the default affinity-based approach for RWO volumes. ## Alternatives @@ -40,13 +40,13 @@ We explicitly do **not** recommend the use of `spec.nodeName` for operator-drive ## Consequences - **Flexibility:** RWX users benefit from the ability to schedule job pods on any node in the cluster, maximizing resource utilization. -- **Node Coupling:** RWO users remain coupled to the node where the runner pod is running. The hook ensures job pods are scheduled on the same node via affinity to maintain workspace integrity. -- **Configuration:** Operators must be aware of the `ACTIONS_RUNNER_USE_KUBE_SCHEDULER` toggle when using RWO. This toggle controls whether the hook uses `nodeName` (bypassing the scheduler) or node affinity (using the scheduler) to pin the pod to the runner's node. RWX configurations do not require this toggle for basic operation. +- **Node Coupling:** RWO users remain coupled to the node where the runner pod is running. The hook ensures job pods are scheduled on the same node via affinity (enabled by default) to maintain workspace integrity. +- **Configuration:** Operators using RWO can rely on the default affinity-based scheduling. Setting `ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER=true` will fall back to legacy `nodeName` pinning (not recommended). RWX configurations do not require any special configuration for basic operation. ## Migration Guidance -Operators migrating from an RWO setup that relied on default `nodeName` behavior to a more robust affinity-based setup should: -1. Ensure the runner pod has the `ACTIONS_RUNNER_USE_KUBE_SCHEDULER` environment variable set to `true`. +Operators migrating from an RWO setup that relied on legacy `nodeName` behavior can continue by setting `ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER=true`, but should migrate to the default affinity-based scheduling for better scheduler integration: +1. Remove the `ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER` environment variable to use the default affinity-based scheduling. 2. Verify that the runner's ServiceAccount has the necessary permissions to list pods (to determine its own node). ## Non-Goals diff --git a/packages/k8s/README.md b/packages/k8s/README.md index 030fb950..86806823 100644 --- a/packages/k8s/README.md +++ b/packages/k8s/README.md @@ -30,7 +30,7 @@ rules: - The `ACTIONS_RUNNER_REQUIRE_JOB_CONTAINER` env should be set to true to prevent the runner from running any jobs outside of a container - The runner pod should map a persistent volume claim into the `_work` directory - The `ACTIONS_RUNNER_CLAIM_NAME` env should be set to the persistent volume claim that contains the runner's working directory, otherwise it defaults to `${ACTIONS_RUNNER_POD_NAME}-work` -- The `ACTIONS_RUNNER_USE_KUBE_SCHEDULER` env can be set to `true` to enable the Kubernetes scheduler for job pods. When set to `true`, the hook uses `nodeAffinity` to ensure job pods are scheduled correctly (essential for `ReadWriteOnce` volumes). If not set, the hook defaults to a legacy mode where job pods are pinned to the same node as the runner pod using `nodeName`. +- By default, the hook uses the Kubernetes scheduler with `nodeAffinity` to ensure job pods are scheduled correctly (essential for `ReadWriteOnce` volumes). Setting `ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER=true` will fall back to legacy `nodeName` pinning (not recommended). ## Storage Guidance The K8s hooks require a shared volume between the runner pod and the job pods to share the workspace and other internal directories. @@ -41,13 +41,13 @@ The preferred way to configure storage is using a `ReadWriteMany` (RWX) Persiste To migrate from RWO to RWX: 1. Provision a new `ReadWriteMany` StorageClass if one is not available. 2. Update your PVC definition to use `accessModes: [ReadWriteMany]`. -3. Remove the `ACTIONS_RUNNER_USE_KUBE_SCHEDULER` environment variable, as affinity is no longer required for pod placement. +3. No additional environment variables are needed - affinity-based scheduling is the default. ### RWO Fallback (Affinity-based) If `ReadWriteMany` storage is not available, you can use `ReadWriteOnce` (RWO) storage. In this mode, all job pods must be scheduled on the same node as the runner pod that owns the PVC. To enable this safely: -1. Ensure `ACTIONS_RUNNER_USE_KUBE_SCHEDULER` is set to `true`. +1. The default affinity-based scheduling works automatically. Do not set `ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER`. 2. The hooks will automatically add a `nodeAffinity` to the job pods, ensuring they are scheduled on the same node as the runner pod (`kubernetes.io/hostname` match). > **Note:** We do not recommend manually setting `nodeName` in the pod template, as the hooks handle node placement automatically via affinity when the scheduler is enabled. diff --git a/packages/k8s/src/k8s/utils.ts b/packages/k8s/src/k8s/utils.ts index 890f6785..ce52699a 100644 --- a/packages/k8s/src/k8s/utils.ts +++ b/packages/k8s/src/k8s/utils.ts @@ -13,7 +13,8 @@ export const DEFAULT_CONTAINER_ENTRY_POINT_ARGS = [`-f`, `/dev/null`] export const DEFAULT_CONTAINER_ENTRY_POINT = 'tail' export const ENV_HOOK_TEMPLATE_PATH = 'ACTIONS_RUNNER_CONTAINER_HOOK_TEMPLATE' -export const ENV_USE_KUBE_SCHEDULER = 'ACTIONS_RUNNER_USE_KUBE_SCHEDULER' +export const ENV_DISABLE_KUBE_SCHEDULER = + 'ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER' export function containerVolumes( userMountVolumes: Mount[] = [], @@ -374,7 +375,7 @@ export function readExtensionFromFile(): k8s.V1PodTemplateSpec | undefined { } export function useKubeScheduler(): boolean { - return process.env[ENV_USE_KUBE_SCHEDULER] === 'true' + return process.env[ENV_DISABLE_KUBE_SCHEDULER] !== 'true' } export enum PodPhase { diff --git a/packages/k8s/tests/rwo-affinity-test.ts b/packages/k8s/tests/rwo-affinity-test.ts index 93d9ed63..62dd1f2b 100644 --- a/packages/k8s/tests/rwo-affinity-test.ts +++ b/packages/k8s/tests/rwo-affinity-test.ts @@ -3,7 +3,7 @@ import { cleanupJob } from '../src/hooks' import { prepareJob } from '../src/hooks/prepare-job' import { TestHelper } from './test-setup' import { getPodByName } from '../src/k8s' -import { ENV_USE_KUBE_SCHEDULER } from '../src/k8s/utils' +import { ENV_DISABLE_KUBE_SCHEDULER } from '../src/k8s/utils' jest.useRealTimers() @@ -22,12 +22,10 @@ describe('RWO Affinity Behavior (Scheduler Mode)', () => { afterEach(async () => { await cleanupJob() await testHelper.cleanup() - delete process.env[ENV_USE_KUBE_SCHEDULER] + delete process.env[ENV_DISABLE_KUBE_SCHEDULER] }) - it('should add nodeAffinity with hostname selector when scheduler mode is enabled', async () => { - process.env[ENV_USE_KUBE_SCHEDULER] = 'true' - + it('should add nodeAffinity with hostname selector by default', async () => { await prepareJob(prepareJobData.args, prepareJobOutputFilePath) const content = JSON.parse( @@ -65,7 +63,7 @@ describe('RWO Affinity Behavior (Scheduler Mode)', () => { }) it('should NOT add nodeAffinity when scheduler mode is disabled', async () => { - process.env[ENV_USE_KUBE_SCHEDULER] = 'false' + process.env[ENV_DISABLE_KUBE_SCHEDULER] = 'true' await prepareJob(prepareJobData.args, prepareJobOutputFilePath) @@ -82,9 +80,7 @@ describe('RWO Affinity Behavior (Scheduler Mode)', () => { expect(pod.spec?.nodeName).toBeDefined() }) - it('should fail assertion if affinity block is missing when scheduler mode is enabled', async () => { - process.env[ENV_USE_KUBE_SCHEDULER] = 'true' - + it('should fail assertion if affinity block is missing by default', async () => { await prepareJob(prepareJobData.args, prepareJobOutputFilePath) const content = JSON.parse( @@ -113,9 +109,7 @@ describe('RWO Affinity Behavior (Scheduler Mode)', () => { ).toBeGreaterThan(0) }) - it('should use correct node name from runner pod in affinity values', async () => { - process.env[ENV_USE_KUBE_SCHEDULER] = 'true' - + it('should use correct node name from runner pod in affinity values by default', async () => { const runnerPodName = process.env.ACTIONS_RUNNER_POD_NAME await prepareJob(prepareJobData.args, prepareJobOutputFilePath) From eaf3d5478f1afa739ec7947914169ea612dd01d5 Mon Sep 17 00:00:00 2001 From: Nikola Jokic Date: Wed, 29 Apr 2026 14:15:08 +0200 Subject: [PATCH 06/11] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- packages/k8s/src/hooks/run-container-step.ts | 26 +++++++++----------- packages/k8s/src/hooks/run-script-step.ts | 2 +- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/packages/k8s/src/hooks/run-container-step.ts b/packages/k8s/src/hooks/run-container-step.ts index 07e0ac32..67f3c996 100644 --- a/packages/k8s/src/hooks/run-container-step.ts +++ b/packages/k8s/src/hooks/run-container-step.ts @@ -27,21 +27,19 @@ export async function runContainerStep( } let secretName: string | undefined = undefined - if (stepContainer.environmentVariables) { - try { - const envs = JSON.parse( - JSON.stringify(stepContainer.environmentVariables) - ) - envs['GITHUB_ACTIONS'] = 'true' - if (!('CI' in envs)) { - envs.CI = 'true' - } - secretName = await createSecretForEnvs(envs) - } catch (err) { - core.debug(`createSecretForEnvs failed: ${JSON.stringify(err)}`) - const message = (err as any)?.response?.body?.message || err - throw new Error(`failed to create script environment: ${message}`) + try { + const envs = JSON.parse( + JSON.stringify(stepContainer.environmentVariables ?? {}) + ) + envs['GITHUB_ACTIONS'] = 'true' + if (!('CI' in envs)) { + envs.CI = 'true' } + secretName = await createSecretForEnvs(envs) + } catch (err) { + core.debug(`createSecretForEnvs failed: ${JSON.stringify(err)}`) + const message = (err as any)?.response?.body?.message || err + throw new Error(`failed to create script environment: ${message}`) } const extension = readExtensionFromFile() diff --git a/packages/k8s/src/hooks/run-script-step.ts b/packages/k8s/src/hooks/run-script-step.ts index 7db0514d..08226188 100644 --- a/packages/k8s/src/hooks/run-script-step.ts +++ b/packages/k8s/src/hooks/run-script-step.ts @@ -33,6 +33,6 @@ export async function runScriptStep( const message = (err as any)?.response?.body?.message || err throw new Error(`failed to run script step: ${message}`) } finally { - fs.rmSync(runnerPath) + fs.rmSync(runnerPath, { force: true }) } } From 68897b641421aaf288ce4ab56781d86b3e8197fc Mon Sep 17 00:00:00 2001 From: Nikola Jokic Date: Wed, 29 Apr 2026 19:34:40 +0200 Subject: [PATCH 07/11] wip --- package.json | 2 + packages/k8s/src/k8s/index.ts | 2 +- packages/k8s/src/k8s/utils.ts | 10 +- packages/k8s/tests/prepare-job-test.ts | 4 +- packages/k8s/tests/run-container-step-test.ts | 6 +- packages/k8s/tests/rwx-contract-demo-test.ts | 42 ++-- packages/k8s/tests/rwx-volume-test.ts | 187 +++++++++--------- packages/k8s/tests/test-setup.ts | 62 ------ 8 files changed, 137 insertions(+), 178 deletions(-) diff --git a/package.json b/package.json index ed388dea..31670166 100644 --- a/package.json +++ b/package.json @@ -8,6 +8,8 @@ }, "scripts": { "test": "npm run test --prefix packages/docker && npm run test --prefix packages/k8s", + "test:docker": "npm run test --prefix packages/docker", + "test:k8s": "npm run test --prefix packages/k8s", "bootstrap": "npm ci --prefix packages/hooklib && npm ci --prefix packages/k8s && npm ci --prefix packages/docker", "format": "prettier --write '**/*.ts'", "format-check": "prettier --check '**/*.ts'", diff --git a/packages/k8s/src/k8s/index.ts b/packages/k8s/src/k8s/index.ts index 40a2e7ff..ea1644ff 100644 --- a/packages/k8s/src/k8s/index.ts +++ b/packages/k8s/src/k8s/index.ts @@ -525,7 +525,7 @@ export async function isPodContainerAlpine( [ 'sh', '-c', - `'[ $(cat /etc/*release* | grep -i -e "^ID=*alpine*" -c) != 0 ] || exit 1'` + '[ $(cat /etc/*release* | grep -i -e "^ID=*alpine*" -c) != 0 ] || exit 1' ], podName, containerName diff --git a/packages/k8s/src/k8s/utils.ts b/packages/k8s/src/k8s/utils.ts index ce52699a..0129011c 100644 --- a/packages/k8s/src/k8s/utils.ts +++ b/packages/k8s/src/k8s/utils.ts @@ -91,9 +91,14 @@ export function containerVolumes( 'Volume mounts outside of the work folder are not supported' ) } - sourceVolumePath = userVolume.sourceVolumePath.slice( + const i = workspacePath.lastIndexOf('_work/') + const workspaceRelativePath = workspacePath.slice(i + '_work/'.length) + const sourceRelativePath = userVolume.sourceVolumePath.slice( workspacePath.length + 1 ) + sourceVolumePath = sourceRelativePath + ? path.posix.join(workspaceRelativePath, sourceRelativePath) + : workspaceRelativePath } else { sourceVolumePath = userVolume.sourceVolumePath } @@ -397,5 +402,8 @@ function mergeLists(base?: T[], from?: T[]): T[] { } export function fixArgs(args: string[]): string[] { + if (args.length >= 2 && args[0] === 'sh' && args[1] === '-c') { + return args + } return shlex.split(args.join(' ')) } diff --git a/packages/k8s/tests/prepare-job-test.ts b/packages/k8s/tests/prepare-job-test.ts index f73ee93b..0a25e36d 100644 --- a/packages/k8s/tests/prepare-job-test.ts +++ b/packages/k8s/tests/prepare-job-test.ts @@ -50,7 +50,7 @@ describe('Prepare job', () => { prepareJobData.args.container.userMountVolumes = [ { sourceVolumePath: userVolumeMount, - targetVolumePath: '/__w/myvolume', + targetVolumePath: '/myvolume', readOnly: false } ] @@ -63,7 +63,7 @@ describe('Prepare job', () => { ) await execPodStep( - ['sh', '-c', '[ "$(cat /__w/myvolume/file.txt)" = "hello" ] || exit 5'], + ['sh', '-c', '[ "$(cat /myvolume/file.txt)" = "hello" ] || exit 5'], content!.state!.jobPod, JOB_CONTAINER_NAME ).then(output => { diff --git a/packages/k8s/tests/run-container-step-test.ts b/packages/k8s/tests/run-container-step-test.ts index cc21f3df..9fde6b57 100644 --- a/packages/k8s/tests/run-container-step-test.ts +++ b/packages/k8s/tests/run-container-step-test.ts @@ -42,7 +42,7 @@ describe('Run container step', () => { { name: JOB_CONTAINER_EXTENSION_NAME, command: ['sh'], - args: ['-c', 'sleep 10000'] + args: ['-c', 'echo test'] }, { name: 'side-container', @@ -64,6 +64,10 @@ describe('Run container step', () => { delete process.env[ENV_HOOK_TEMPLATE_PATH] }) + afterEach(() => { + delete process.env[ENV_HOOK_TEMPLATE_PATH] + }) + it('should shold have env variables available', async () => { runContainerStepData.args.entryPoint = 'bash' runContainerStepData.args.entryPointArgs = [ diff --git a/packages/k8s/tests/rwx-contract-demo-test.ts b/packages/k8s/tests/rwx-contract-demo-test.ts index e3556b47..04e57c0b 100644 --- a/packages/k8s/tests/rwx-contract-demo-test.ts +++ b/packages/k8s/tests/rwx-contract-demo-test.ts @@ -1,28 +1,28 @@ -import { - isRWXTestEnabled, - getRWXStorageClass, - RWX_SKIP_MESSAGE -} from './test-setup' +import * as k8s from '@kubernetes/client-node' -describe('RWX Test Contract Demo', () => { - const describeOrSkip = isRWXTestEnabled() ? describe : describe.skip +const kc = new k8s.KubeConfig() +kc.loadFromDefault() +const k8sApi = kc.makeApiClient(k8s.StorageV1Api) - describeOrSkip('RWX volume tests', () => { - it('should use RWX storage class when enabled', () => { - const storageClass = getRWXStorageClass() - expect(storageClass).toBeDefined() - expect(typeof storageClass).toBe('string') +describe('RWX Test Contract Demo', () => { + describe('RWX volume tests', () => { + it('should have at least one available storage class', async () => { + const list = await k8sApi.listStorageClass() + expect(list.items.length).toBeGreaterThan(0) }) - it('should verify both env vars are required', () => { - expect(process.env.ACTIONS_RUNNER_K8S_TEST_ENABLE_RWX).toBe('true') - expect( - process.env.ACTIONS_RUNNER_K8S_TEST_RWX_STORAGE_CLASS - ).toBeDefined() + it('should have at least one default storage class in cluster', async () => { + const list = await k8sApi.listStorageClass() + const hasDefault = list.items.some(sc => { + const annotations = sc.metadata?.annotations || {} + return ( + annotations['storageclass.kubernetes.io/is-default-class'] === + 'true' || + annotations['storageclass.beta.kubernetes.io/is-default-class'] === + 'true' + ) + }) + expect(hasDefault).toBe(true) }) }) - - if (!isRWXTestEnabled()) { - it(RWX_SKIP_MESSAGE, () => {}) - } }) diff --git a/packages/k8s/tests/rwx-volume-test.ts b/packages/k8s/tests/rwx-volume-test.ts index 22ff848e..97c06ece 100644 --- a/packages/k8s/tests/rwx-volume-test.ts +++ b/packages/k8s/tests/rwx-volume-test.ts @@ -1,12 +1,8 @@ import * as k8s from '@kubernetes/client-node' import * as fs from 'fs' +import * as path from 'path' import { cleanupJob, prepareJob, runScriptStep } from '../src/hooks' -import { - TestHelper, - isRWXTestEnabled, - getRWXStorageClass, - RWX_SKIP_MESSAGE -} from './test-setup' +import { TestHelper } from './test-setup' import { RunScriptStepArgs } from 'hooklib' jest.useRealTimers() @@ -14,106 +10,117 @@ jest.useRealTimers() const kc = new k8s.KubeConfig() kc.loadFromDefault() const k8sApi = kc.makeApiClient(k8s.CoreV1Api) +const k8sStorageApi = kc.makeApiClient(k8s.StorageV1Api) describe('RWX Volume Tests', () => { - const describeOrSkip = isRWXTestEnabled() ? describe : describe.skip - - describeOrSkip('RWX volume integration', () => { - let testHelper: TestHelper - let rwxPvcName: string - let prepareJobData: any - let prepareJobOutputFilePath: string - - beforeEach(async () => { - testHelper = new TestHelper() - await testHelper.initialize() - - const podName = process.env.ACTIONS_RUNNER_POD_NAME - rwxPvcName = `${podName}-work-rwx` - - const volumeClaim: k8s.V1PersistentVolumeClaim = { - metadata: { - name: rwxPvcName - }, - spec: { - accessModes: ['ReadWriteMany'], - volumeMode: 'Filesystem', - storageClassName: getRWXStorageClass(), - resources: { - requests: { - storage: '1Gi' - } - } - } + let testHelper: TestHelper + let rwxPvcName: string + let rwxPvName: string + let rwxStorageClassName: string + let prepareJobData: any + let prepareJobOutputFilePath: string + + beforeEach(async () => { + testHelper = new TestHelper() + await testHelper.initialize() + + const podName = process.env.ACTIONS_RUNNER_POD_NAME as string + const runnerWorkspace = process.env.RUNNER_WORKSPACE as string + const runnerWorkRoot = path.resolve(runnerWorkspace, '..') + + rwxPvcName = `${podName}-work-rwx` + rwxPvName = `${podName}-work-rwx-pv` + rwxStorageClassName = `${podName}-work-rwx-storage` + + const sc: k8s.V1StorageClass = { + metadata: { name: rwxStorageClassName }, + provisioner: 'kubernetes.io/no-provisioner', + volumeBindingMode: 'Immediate' + } + await k8sStorageApi.createStorageClass({ body: sc }) + + const pv: k8s.V1PersistentVolume = { + metadata: { name: rwxPvName }, + spec: { + storageClassName: rwxStorageClassName, + capacity: { storage: '2Gi' }, + volumeMode: 'Filesystem', + accessModes: ['ReadWriteMany'], + hostPath: { path: runnerWorkRoot } } + } + await k8sApi.createPersistentVolume({ body: pv }) + + const volumeClaim: k8s.V1PersistentVolumeClaim = { + metadata: { name: rwxPvcName }, + spec: { + accessModes: ['ReadWriteMany'], + volumeMode: 'Filesystem', + storageClassName: rwxStorageClassName, + volumeName: rwxPvName, + resources: { requests: { storage: '1Gi' } } + } + } - await k8sApi.createNamespacedPersistentVolumeClaim({ - namespace: 'default', - body: volumeClaim - }) - - process.env.ACTIONS_RUNNER_CLAIM_NAME = rwxPvcName - - prepareJobData = testHelper.getPrepareJobDefinition() - prepareJobOutputFilePath = testHelper.createFile( - 'prepare-job-output.json' - ) + await k8sApi.createNamespacedPersistentVolumeClaim({ + namespace: 'default', + body: volumeClaim }) - afterAll(async () => { - if (rwxPvcName) { - try { - await k8sApi.deleteNamespacedPersistentVolumeClaim({ - name: rwxPvcName, - namespace: 'default' - }) - } catch { - // Ignore cleanup errors - PVC may not exist - } - } - }) + process.env.ACTIONS_RUNNER_CLAIM_NAME = rwxPvcName - afterEach(async () => { - await testHelper.cleanup() - }) + prepareJobData = testHelper.getPrepareJobDefinition() + prepareJobOutputFilePath = testHelper.createFile('prepare-job-output.json') + }) - it('should successfully run hook flow with RWX volume', async () => { - await expect( - prepareJob(prepareJobData.args, prepareJobOutputFilePath) - ).resolves.not.toThrow() + afterEach(async () => { + await testHelper.cleanup() + delete process.env.ACTIONS_RUNNER_CLAIM_NAME + await k8sApi + .deleteNamespacedPersistentVolumeClaim({ + name: rwxPvcName, + namespace: 'default', + gracePeriodSeconds: 0 + }) + .catch(() => undefined) + await k8sApi.deletePersistentVolume({ name: rwxPvName }).catch(() => undefined) + await k8sStorageApi + .deleteStorageClass({ name: rwxStorageClassName }) + .catch(() => undefined) + }) - const prepareJobOutputJson = fs.readFileSync(prepareJobOutputFilePath) - const prepareJobOutputData = JSON.parse(prepareJobOutputJson.toString()) + it('should successfully run hook flow with RWX volume', async () => { + await expect( + prepareJob(prepareJobData.args, prepareJobOutputFilePath) + ).resolves.not.toThrow() - const scriptStepData = testHelper.getRunScriptStepDefinition() + const prepareJobOutputJson = fs.readFileSync(prepareJobOutputFilePath) + const prepareJobOutputData = JSON.parse(prepareJobOutputJson.toString()) - await expect( - runScriptStep( - scriptStepData.args as RunScriptStepArgs, - prepareJobOutputData.state - ) - ).resolves.not.toThrow() + const scriptStepData = testHelper.getRunScriptStepDefinition() - await expect(cleanupJob()).resolves.not.toThrow() - }) + await expect( + runScriptStep( + scriptStepData.args as RunScriptStepArgs, + prepareJobOutputData.state + ) + ).resolves.not.toThrow() - it('should verify RWX PVC was created with correct access mode', async () => { - const pvc = await k8sApi.readNamespacedPersistentVolumeClaim({ - name: rwxPvcName, - namespace: 'default' - }) + await expect(cleanupJob()).resolves.not.toThrow() + }) - expect(pvc.spec?.accessModes).toContain('ReadWriteMany') - expect(pvc.spec?.storageClassName).toBe(getRWXStorageClass()) - expect(pvc.spec?.volumeMode).toBe('Filesystem') + it('should verify RWX PVC was created with correct access mode', async () => { + const pvc = await k8sApi.readNamespacedPersistentVolumeClaim({ + name: rwxPvcName, + namespace: 'default' }) - it('should verify RWX claim name is set correctly', () => { - expect(process.env.ACTIONS_RUNNER_CLAIM_NAME).toBe(rwxPvcName) - }) + expect(pvc.spec?.accessModes).toContain('ReadWriteMany') + expect(pvc.spec?.storageClassName).toBe(rwxStorageClassName) + expect(pvc.spec?.volumeMode).toBe('Filesystem') }) - if (!isRWXTestEnabled()) { - it(RWX_SKIP_MESSAGE, () => {}) - } + it('should verify RWX claim name is set correctly', () => { + expect(process.env.ACTIONS_RUNNER_CLAIM_NAME).toBe(rwxPvcName) + }) }) diff --git a/packages/k8s/tests/test-setup.ts b/packages/k8s/tests/test-setup.ts index 3ed6a604..28dbc550 100644 --- a/packages/k8s/tests/test-setup.ts +++ b/packages/k8s/tests/test-setup.ts @@ -243,65 +243,3 @@ export class TestHelper { return runContainerStep } } - -/** - * RWX Test Contract: - * - * Tests requiring ReadWriteMany (RWX) volumes MUST be gated by TWO environment variables: - * 1. ACTIONS_RUNNER_K8S_TEST_ENABLE_RWX=true (explicit opt-in) - * 2. ACTIONS_RUNNER_K8S_TEST_RWX_STORAGE_CLASS= (storage class that supports RWX) - * - * If either variable is missing or ACTIONS_RUNNER_K8S_TEST_ENABLE_RWX is not "true", - * the test MUST be skipped with the exact message defined in this contract. - * - * This contract ensures: - * - RWX tests do not fail on clusters without RWX provisioners - * - Test requirements are explicit and documented - * - RWO affinity tests remain independent and always runnable - * - Skip behavior is deterministic (no dynamic cluster probing) - * - * Usage example: - * ```typescript - * import { isRWXTestEnabled, getRWXStorageClass, RWX_SKIP_MESSAGE } from './test-setup' - * - * describe('RWX Test Suite', () => { - * const describeOrSkip = isRWXTestEnabled() ? describe : describe.skip - * - * describeOrSkip('RWX volume tests', () => { - * it('should test RWX functionality', async () => { - * const storageClass = getRWXStorageClass() - * // ... test code using storageClass - * }) - * }) - * - * if (!isRWXTestEnabled()) { - * it(RWX_SKIP_MESSAGE, () => {}) - * } - * }) - * ``` - */ - -/** - * Checks if RWX tests should run based on environment variables. - * @returns true if both ACTIONS_RUNNER_K8S_TEST_ENABLE_RWX=true and ACTIONS_RUNNER_K8S_TEST_RWX_STORAGE_CLASS are set - */ -export function isRWXTestEnabled(): boolean { - const enabled = process.env.ACTIONS_RUNNER_K8S_TEST_ENABLE_RWX === 'true' - const storageClass = process.env.ACTIONS_RUNNER_K8S_TEST_RWX_STORAGE_CLASS - return enabled && !!storageClass -} - -/** - * Gets the RWX storage class name from environment variable. - * @returns The storage class name, or undefined if not set - */ -export function getRWXStorageClass(): string | undefined { - return process.env.ACTIONS_RUNNER_K8S_TEST_RWX_STORAGE_CLASS -} - -/** - * Skip message constant - DO NOT MODIFY - * This exact message must be used when skipping RWX tests - */ -export const RWX_SKIP_MESSAGE = - 'RWX tests skipped: set ACTIONS_RUNNER_K8S_TEST_ENABLE_RWX=true and ACTIONS_RUNNER_K8S_TEST_RWX_STORAGE_CLASS' From e260d4260a89ac232aebff29b8ec30f1ceefd124 Mon Sep 17 00:00:00 2001 From: Nikola Jokic Date: Wed, 6 May 2026 19:18:20 +0200 Subject: [PATCH 08/11] Always lean on scheduler --- docs/adrs/0135-rwx-volume-strategy.md | 19 +++---- packages/k8s/README.md | 11 ++-- packages/k8s/src/k8s/index.ts | 54 +++++++++++++------- packages/k8s/src/k8s/utils.ts | 7 ++- packages/k8s/tests/rwo-affinity-test.ts | 67 +++++++++++++++---------- 5 files changed, 96 insertions(+), 62 deletions(-) diff --git a/docs/adrs/0135-rwx-volume-strategy.md b/docs/adrs/0135-rwx-volume-strategy.md index 9755fc4e..704c5a9d 100644 --- a/docs/adrs/0135-rwx-volume-strategy.md +++ b/docs/adrs/0135-rwx-volume-strategy.md @@ -21,16 +21,16 @@ For environments where RWX is unavailable or undesirable, we support a `ReadWrit ### Operational Guidance 1. **Preferred Model (RWX):** Operators should configure the runner with a PVC supporting `ReadWriteMany`. -2. **Fallback Model (RWO):** If using `ReadWriteOnce`, the Kubernetes scheduler integration is enabled by default. Operators can optionally disable it by setting `ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER=true` (not recommended). -3. **Node Selection:** By default, the hook applies a `requiredDuringSchedulingIgnoredDuringExecution` node affinity targeting the runner's current node (`kubernetes.io/hostname`). +2. **Fallback Model (RWO):** If using `ReadWriteOnce`, set `ACTIONS_RUNNER_HOOK_RWO=true` to enforce same-node scheduling. +3. **Node Selection:** By default, the hook applies a `preferredDuringSchedulingIgnoredDuringExecution` node affinity targeting the runner's current node (`kubernetes.io/hostname`). With `ACTIONS_RUNNER_HOOK_RWO=true`, this becomes `requiredDuringSchedulingIgnoredDuringExecution`. 4. **Implementation Details:** - The hook determines the node name via `getCurrentNodeName()` and applies affinity in `packages/k8s/src/k8s/index.ts` (lines 101, 165). - - The scheduler is enabled by default. Setting `ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER=true` disables it, as defined in `packages/k8s/src/k8s/utils.ts` (line 16). + - `ACTIONS_RUNNER_HOOK_RWO=true` enables required same-node affinity, as defined in `packages/k8s/src/k8s/utils.ts`. - The PVC claim name defaults to `${ACTIONS_RUNNER_POD_NAME}-work` unless overridden by `ACTIONS_RUNNER_CLAIM_NAME` (`packages/k8s/src/hooks/constants.ts`, lines 27-33). ### Non-Recommendations -We explicitly do **not** recommend the use of `spec.nodeName` for operator-driven scheduling. While the hook uses `nodeName` as a legacy fallback when `ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER` is set to `true` (`packages/k8s/src/k8s/index.ts`, lines 103, 167), this bypasses the Kubernetes scheduler and can lead to scheduling failures or resource imbalances. Operators should prefer the default affinity-based approach for RWO volumes. +We explicitly do **not** recommend the use of `spec.nodeName` for operator-driven scheduling. The hook relies on scheduler-based affinity (`preferred` by default, `required` when `ACTIONS_RUNNER_HOOK_RWO=true`) to keep scheduling decisions scheduler-aware and avoid hard node pinning. ## Alternatives @@ -40,14 +40,15 @@ We explicitly do **not** recommend the use of `spec.nodeName` for operator-drive ## Consequences - **Flexibility:** RWX users benefit from the ability to schedule job pods on any node in the cluster, maximizing resource utilization. -- **Node Coupling:** RWO users remain coupled to the node where the runner pod is running. The hook ensures job pods are scheduled on the same node via affinity (enabled by default) to maintain workspace integrity. -- **Configuration:** Operators using RWO can rely on the default affinity-based scheduling. Setting `ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER=true` will fall back to legacy `nodeName` pinning (not recommended). RWX configurations do not require any special configuration for basic operation. +- **Node Coupling:** RWO users remain coupled to the node where the runner pod is running when `ACTIONS_RUNNER_HOOK_RWO=true`. The hook ensures job pods are scheduled on the same node via required affinity to maintain workspace integrity. +- **Configuration:** Default behavior is preferred same-node affinity. Operators using strict RWO semantics should set `ACTIONS_RUNNER_HOOK_RWO=true` for required same-node affinity. RWX configurations do not require any special configuration for basic operation. ## Migration Guidance -Operators migrating from an RWO setup that relied on legacy `nodeName` behavior can continue by setting `ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER=true`, but should migrate to the default affinity-based scheduling for better scheduler integration: -1. Remove the `ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER` environment variable to use the default affinity-based scheduling. -2. Verify that the runner's ServiceAccount has the necessary permissions to list pods (to determine its own node). +Operators migrating from an RWO setup that relied on legacy `nodeName` behavior should migrate to scheduler-based affinity: +1. Remove `ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER` if present. +2. Set `ACTIONS_RUNNER_HOOK_RWO=true` to enforce required same-node affinity. +3. Verify that the runner's ServiceAccount has the necessary permissions to list pods (to determine its own node). ## Non-Goals diff --git a/packages/k8s/README.md b/packages/k8s/README.md index 86806823..4a95e3ee 100644 --- a/packages/k8s/README.md +++ b/packages/k8s/README.md @@ -30,7 +30,8 @@ rules: - The `ACTIONS_RUNNER_REQUIRE_JOB_CONTAINER` env should be set to true to prevent the runner from running any jobs outside of a container - The runner pod should map a persistent volume claim into the `_work` directory - The `ACTIONS_RUNNER_CLAIM_NAME` env should be set to the persistent volume claim that contains the runner's working directory, otherwise it defaults to `${ACTIONS_RUNNER_POD_NAME}-work` -- By default, the hook uses the Kubernetes scheduler with `nodeAffinity` to ensure job pods are scheduled correctly (essential for `ReadWriteOnce` volumes). Setting `ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER=true` will fall back to legacy `nodeName` pinning (not recommended). +- By default, the hook uses the Kubernetes scheduler and sets a *preferred* `nodeAffinity` to the runner node (`kubernetes.io/hostname`) without requiring it. +- Setting `ACTIONS_RUNNER_HOOK_RWO=true` switches this to a *required* node affinity, ensuring scheduling on the same node as the runner pod (recommended for `ReadWriteOnce` volumes). ## Storage Guidance The K8s hooks require a shared volume between the runner pod and the job pods to share the workspace and other internal directories. @@ -41,16 +42,16 @@ The preferred way to configure storage is using a `ReadWriteMany` (RWX) Persiste To migrate from RWO to RWX: 1. Provision a new `ReadWriteMany` StorageClass if one is not available. 2. Update your PVC definition to use `accessModes: [ReadWriteMany]`. -3. No additional environment variables are needed - affinity-based scheduling is the default. +3. No additional environment variables are needed - preferred same-node affinity is the default. ### RWO Fallback (Affinity-based) If `ReadWriteMany` storage is not available, you can use `ReadWriteOnce` (RWO) storage. In this mode, all job pods must be scheduled on the same node as the runner pod that owns the PVC. To enable this safely: -1. The default affinity-based scheduling works automatically. Do not set `ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER`. -2. The hooks will automatically add a `nodeAffinity` to the job pods, ensuring they are scheduled on the same node as the runner pod (`kubernetes.io/hostname` match). +1. Set `ACTIONS_RUNNER_HOOK_RWO=true`. +2. The hooks will add a required `nodeAffinity` to job pods, ensuring they are scheduled on the same node as the runner pod (`kubernetes.io/hostname` match). -> **Note:** We do not recommend manually setting `nodeName` in the pod template, as the hooks handle node placement automatically via affinity when the scheduler is enabled. +> **Note:** We do not recommend manually setting `nodeName` in the pod template, as the hooks handle node placement automatically via affinity. - Some actions runner env's are expected to be set. These are set automatically by the runner. - `RUNNER_WORKSPACE` is expected to be set to the workspace of the runner - `GITHUB_WORKSPACE` is expected to be set to the workspace of the job diff --git a/packages/k8s/src/k8s/index.ts b/packages/k8s/src/k8s/index.ts index ea1644ff..42c56808 100644 --- a/packages/k8s/src/k8s/index.ts +++ b/packages/k8s/src/k8s/index.ts @@ -14,7 +14,7 @@ import { PodPhase, mergePodSpecWithOptions, mergeObjectMeta, - useKubeScheduler, + useRwoRequiredAffinity, fixArgs } from './utils' @@ -96,11 +96,7 @@ export async function createPod( appPod.spec.restartPolicy = 'Never' const nodeName = await getCurrentNodeName() - if (useKubeScheduler()) { - appPod.spec.affinity = await getPodAffinity(nodeName) - } else { - appPod.spec.nodeName = nodeName - } + appPod.spec.affinity = await getPodAffinity(nodeName, useRwoRequiredAffinity()) const claimName = getVolumeClaimName() appPod.spec.volumes = [ { @@ -160,11 +156,10 @@ export async function createJob( job.spec.template.spec.restartPolicy = 'Never' const nodeName = await getCurrentNodeName() - if (useKubeScheduler()) { - job.spec.template.spec.affinity = await getPodAffinity(nodeName) - } else { - job.spec.template.spec.nodeName = nodeName - } + job.spec.template.spec.affinity = await getPodAffinity( + nodeName, + useRwoRequiredAffinity() + ) const claimName = getVolumeClaimName() job.spec.template.spec.volumes = [ @@ -564,14 +559,35 @@ async function getCurrentNodeName(): Promise { return nodeName } -async function getPodAffinity(nodeName: string): Promise { +async function getPodAffinity( + nodeName: string, + requiredDuringScheduling: boolean +): Promise { const affinity = new k8s.V1Affinity() affinity.nodeAffinity = new k8s.V1NodeAffinity() - affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution = - new k8s.V1NodeSelector() - affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms = - [ - { + + if (requiredDuringScheduling) { + affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution = + new k8s.V1NodeSelector() + affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms = + [ + { + matchExpressions: [ + { + key: 'kubernetes.io/hostname', + operator: 'In', + values: [nodeName] + } + ] + } + ] + return affinity + } + + affinity.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution = [ + { + weight: 100, + preference: { matchExpressions: [ { key: 'kubernetes.io/hostname', @@ -580,7 +596,9 @@ async function getPodAffinity(nodeName: string): Promise { } ] } - ] + } + ] + return affinity } diff --git a/packages/k8s/src/k8s/utils.ts b/packages/k8s/src/k8s/utils.ts index 0129011c..d1e0823e 100644 --- a/packages/k8s/src/k8s/utils.ts +++ b/packages/k8s/src/k8s/utils.ts @@ -13,8 +13,7 @@ export const DEFAULT_CONTAINER_ENTRY_POINT_ARGS = [`-f`, `/dev/null`] export const DEFAULT_CONTAINER_ENTRY_POINT = 'tail' export const ENV_HOOK_TEMPLATE_PATH = 'ACTIONS_RUNNER_CONTAINER_HOOK_TEMPLATE' -export const ENV_DISABLE_KUBE_SCHEDULER = - 'ACTIONS_RUNNER_DISABLE_KUBE_SCHEDULER' +export const ENV_HOOK_RWO = 'ACTIONS_RUNNER_HOOK_RWO' export function containerVolumes( userMountVolumes: Mount[] = [], @@ -379,8 +378,8 @@ export function readExtensionFromFile(): k8s.V1PodTemplateSpec | undefined { return doc as k8s.V1PodTemplateSpec } -export function useKubeScheduler(): boolean { - return process.env[ENV_DISABLE_KUBE_SCHEDULER] !== 'true' +export function useRwoRequiredAffinity(): boolean { + return process.env[ENV_HOOK_RWO] === 'true' } export enum PodPhase { diff --git a/packages/k8s/tests/rwo-affinity-test.ts b/packages/k8s/tests/rwo-affinity-test.ts index 62dd1f2b..38c2c3cf 100644 --- a/packages/k8s/tests/rwo-affinity-test.ts +++ b/packages/k8s/tests/rwo-affinity-test.ts @@ -3,7 +3,7 @@ import { cleanupJob } from '../src/hooks' import { prepareJob } from '../src/hooks/prepare-job' import { TestHelper } from './test-setup' import { getPodByName } from '../src/k8s' -import { ENV_DISABLE_KUBE_SCHEDULER } from '../src/k8s/utils' +import { ENV_HOOK_RWO } from '../src/k8s/utils' jest.useRealTimers() @@ -11,7 +11,7 @@ let testHelper: TestHelper let prepareJobData: any let prepareJobOutputFilePath: string -describe('RWO Affinity Behavior (Scheduler Mode)', () => { +describe('RWO Affinity Behavior', () => { beforeEach(async () => { testHelper = new TestHelper() await testHelper.initialize() @@ -22,10 +22,10 @@ describe('RWO Affinity Behavior (Scheduler Mode)', () => { afterEach(async () => { await cleanupJob() await testHelper.cleanup() - delete process.env[ENV_DISABLE_KUBE_SCHEDULER] + delete process.env[ENV_HOOK_RWO] }) - it('should add nodeAffinity with hostname selector by default', async () => { + it('should add preferred nodeAffinity with hostname selector by default', async () => { await prepareJob(prepareJobData.args, prepareJobOutputFilePath) const content = JSON.parse( @@ -39,17 +39,17 @@ describe('RWO Affinity Behavior (Scheduler Mode)', () => { const nodeAffinity = pod.spec?.affinity?.nodeAffinity expect( - nodeAffinity?.requiredDuringSchedulingIgnoredDuringExecution + nodeAffinity?.preferredDuringSchedulingIgnoredDuringExecution ).toBeDefined() - const nodeSelectorTerms = - nodeAffinity?.requiredDuringSchedulingIgnoredDuringExecution - ?.nodeSelectorTerms + const preferred = + nodeAffinity?.preferredDuringSchedulingIgnoredDuringExecution - expect(nodeSelectorTerms).toBeDefined() - expect(nodeSelectorTerms?.length).toBeGreaterThan(0) + expect(preferred).toBeDefined() + expect(preferred?.length).toBeGreaterThan(0) + expect(preferred?.[0]?.weight).toBe(100) - const matchExpressions = nodeSelectorTerms?.[0].matchExpressions + const matchExpressions = preferred?.[0]?.preference?.matchExpressions expect(matchExpressions).toBeDefined() expect(matchExpressions?.length).toBeGreaterThan(0) @@ -62,8 +62,8 @@ describe('RWO Affinity Behavior (Scheduler Mode)', () => { expect(hostnameExpression?.values?.[0]).toBeTruthy() }) - it('should NOT add nodeAffinity when scheduler mode is disabled', async () => { - process.env[ENV_DISABLE_KUBE_SCHEDULER] = 'true' + it('should add required nodeAffinity when ACTIONS_RUNNER_HOOK_RWO=true', async () => { + process.env[ENV_HOOK_RWO] = 'true' await prepareJob(prepareJobData.args, prepareJobOutputFilePath) @@ -73,14 +73,27 @@ describe('RWO Affinity Behavior (Scheduler Mode)', () => { const pod = await getPodByName(content.state.jobPod) - if (pod.spec?.affinity) { - expect(pod.spec.affinity.nodeAffinity).toBeUndefined() - } + expect(pod.spec?.affinity).toBeDefined() + expect(pod.spec?.affinity?.nodeAffinity).toBeDefined() + expect( + pod.spec?.affinity?.nodeAffinity + ?.requiredDuringSchedulingIgnoredDuringExecution + ).toBeDefined() + expect( + pod.spec?.affinity?.nodeAffinity + ?.preferredDuringSchedulingIgnoredDuringExecution + ).toBeUndefined() + + const requiredValues = + pod.spec?.affinity?.nodeAffinity + ?.requiredDuringSchedulingIgnoredDuringExecution?.nodeSelectorTerms?.[0] + ?.matchExpressions?.[0]?.values - expect(pod.spec?.nodeName).toBeDefined() + expect(requiredValues).toBeDefined() + expect(requiredValues?.length).toBeGreaterThan(0) }) - it('should fail assertion if affinity block is missing by default', async () => { + it('should not require node affinity by default', async () => { await prepareJob(prepareJobData.args, prepareJobOutputFilePath) const content = JSON.parse( @@ -94,22 +107,24 @@ describe('RWO Affinity Behavior (Scheduler Mode)', () => { expect( pod.spec?.affinity?.nodeAffinity ?.requiredDuringSchedulingIgnoredDuringExecution - ).toBeDefined() + ).toBeUndefined() - const nodeSelectorTerms = + const preferred = pod.spec?.affinity?.nodeAffinity - ?.requiredDuringSchedulingIgnoredDuringExecution?.nodeSelectorTerms + ?.preferredDuringSchedulingIgnoredDuringExecution - expect(nodeSelectorTerms?.[0]?.matchExpressions?.[0]?.key).toBe( + expect(preferred?.[0]?.preference?.matchExpressions?.[0]?.key).toBe( 'kubernetes.io/hostname' ) - expect(nodeSelectorTerms?.[0]?.matchExpressions?.[0]?.operator).toBe('In') + expect(preferred?.[0]?.preference?.matchExpressions?.[0]?.operator).toBe( + 'In' + ) expect( - nodeSelectorTerms?.[0]?.matchExpressions?.[0]?.values?.length + preferred?.[0]?.preference?.matchExpressions?.[0]?.values?.length ).toBeGreaterThan(0) }) - it('should use correct node name from runner pod in affinity values by default', async () => { + it('should use correct runner node name in preferred affinity values by default', async () => { const runnerPodName = process.env.ACTIONS_RUNNER_POD_NAME await prepareJob(prepareJobData.args, prepareJobOutputFilePath) @@ -124,7 +139,7 @@ describe('RWO Affinity Behavior (Scheduler Mode)', () => { const affinityValues = jobPod.spec?.affinity?.nodeAffinity - ?.requiredDuringSchedulingIgnoredDuringExecution?.nodeSelectorTerms?.[0] + ?.preferredDuringSchedulingIgnoredDuringExecution?.[0]?.preference ?.matchExpressions?.[0]?.values expect(affinityValues).toBeDefined() From 2f496d4636b13915a010e4518680625b4977de40 Mon Sep 17 00:00:00 2001 From: Nikola Jokic Date: Thu, 7 May 2026 13:19:11 +0200 Subject: [PATCH 09/11] fmt --- packages/k8s/src/k8s/index.ts | 5 ++++- packages/k8s/tests/rwx-volume-test.ts | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/packages/k8s/src/k8s/index.ts b/packages/k8s/src/k8s/index.ts index 42c56808..f0294ad5 100644 --- a/packages/k8s/src/k8s/index.ts +++ b/packages/k8s/src/k8s/index.ts @@ -96,7 +96,10 @@ export async function createPod( appPod.spec.restartPolicy = 'Never' const nodeName = await getCurrentNodeName() - appPod.spec.affinity = await getPodAffinity(nodeName, useRwoRequiredAffinity()) + appPod.spec.affinity = await getPodAffinity( + nodeName, + useRwoRequiredAffinity() + ) const claimName = getVolumeClaimName() appPod.spec.volumes = [ { diff --git a/packages/k8s/tests/rwx-volume-test.ts b/packages/k8s/tests/rwx-volume-test.ts index 97c06ece..50c637ba 100644 --- a/packages/k8s/tests/rwx-volume-test.ts +++ b/packages/k8s/tests/rwx-volume-test.ts @@ -83,7 +83,9 @@ describe('RWX Volume Tests', () => { gracePeriodSeconds: 0 }) .catch(() => undefined) - await k8sApi.deletePersistentVolume({ name: rwxPvName }).catch(() => undefined) + await k8sApi + .deletePersistentVolume({ name: rwxPvName }) + .catch(() => undefined) await k8sStorageApi .deleteStorageClass({ name: rwxStorageClassName }) .catch(() => undefined) From 188f6065a8fdcd8e4dd3e590276b3b7adeee8fb3 Mon Sep 17 00:00:00 2001 From: Nikola Jokic Date: Thu, 7 May 2026 14:37:31 +0200 Subject: [PATCH 10/11] wip --- packages/k8s/src/k8s/utils.ts | 57 -------------- packages/k8s/tests/k8s-utils-test.ts | 108 --------------------------- 2 files changed, 165 deletions(-) diff --git a/packages/k8s/src/k8s/utils.ts b/packages/k8s/src/k8s/utils.ts index d1e0823e..5d712067 100644 --- a/packages/k8s/src/k8s/utils.ts +++ b/packages/k8s/src/k8s/utils.ts @@ -113,28 +113,6 @@ export function containerVolumes( return mounts } -export function prepareJobScript(userVolumeMounts: Mount[]): { - containerPath: string - runnerPath: string -} { - let mountDirs = userVolumeMounts.map(m => m.targetVolumePath).join(' ') - - const content = `#!/bin/sh -l -set -e -cp -R /__w/_temp/_github_home /github/home -cp -R /__w/_temp/_github_workflow /github/workflow -mkdir -p ${mountDirs} -` - - const filename = `${uuidv4()}.sh` - const entryPointPath = `${process.env.RUNNER_TEMP}/${filename}` - fs.writeFileSync(entryPointPath, content) - return { - containerPath: `/__w/_temp/${filename}`, - runnerPath: entryPointPath - } -} - export function writeEntryPointScript( workingDirectory: string, entryPoint: string, @@ -167,41 +145,6 @@ exec ${environmentPrefix} ${entryPoint} ${ } } -export function writeRunScript( - workingDirectory: string, - entryPoint: string, - entryPointArgs?: string[], - prependPath?: string[], - environmentVariables?: { [key: string]: string } -): { containerPath: string; runnerPath: string } { - let exportPath = '' - if (prependPath?.length) { - // TODO: remove compatibility with typeof prependPath === 'string' as we bump to next major version, the hooks will lose PrependPath compat with runners 2.293.0 and older - const prepend = - typeof prependPath === 'string' ? prependPath : prependPath.join(':') - exportPath = `export PATH=${prepend}:$PATH` - } - - let environmentPrefix = scriptEnv(environmentVariables) - - const content = `#!/bin/sh -l -set -e -rm "$0" # remove script after running -${exportPath} -cd ${workingDirectory} && \ -exec ${environmentPrefix} ${entryPoint} ${ - entryPointArgs?.length ? entryPointArgs.join(' ') : '' - } -` - const filename = `${uuidv4()}.sh` - const entryPointPath = `${process.env.RUNNER_TEMP}/${filename}` - fs.writeFileSync(entryPointPath, content) - return { - containerPath: `/__w/_temp/${filename}`, - runnerPath: entryPointPath - } -} - export function writeContainerStepScript( dst: string, workingDirectory: string, diff --git a/packages/k8s/tests/k8s-utils-test.ts b/packages/k8s/tests/k8s-utils-test.ts index bfb6c453..bc99345e 100644 --- a/packages/k8s/tests/k8s-utils-test.ts +++ b/packages/k8s/tests/k8s-utils-test.ts @@ -2,7 +2,6 @@ import { containerPorts } from '../src/k8s' import { generateContainerName, - writeRunScript, mergePodSpecWithOptions, mergeContainerWithOptions, readExtensionFromFile, @@ -14,113 +13,6 @@ import { TestHelper } from './test-setup' let testHelper: TestHelper describe('k8s utils', () => { - describe('write entrypoint', () => { - beforeEach(async () => { - testHelper = new TestHelper() - await testHelper.initialize() - }) - - afterEach(async () => { - await testHelper.cleanup() - }) - - it('should not throw', () => { - expect(() => - writeRunScript('/test', 'sh', ['-e', 'script.sh'], ['/prepend/path'], { - SOME_ENV: 'SOME_VALUE' - }) - ).not.toThrow() - }) - - it('should throw if RUNNER_TEMP is not set', () => { - delete process.env.RUNNER_TEMP - expect(() => - writeRunScript('/test', 'sh', ['-e', 'script.sh'], ['/prepend/path'], { - SOME_ENV: 'SOME_VALUE' - }) - ).toThrow() - }) - - it('should throw if environment variable name contains double quote', () => { - expect(() => - writeRunScript('/test', 'sh', ['-e', 'script.sh'], ['/prepend/path'], { - 'SOME"_ENV': 'SOME_VALUE' - }) - ).toThrow() - }) - - it('should throw if environment variable name contains =', () => { - expect(() => - writeRunScript('/test', 'sh', ['-e', 'script.sh'], ['/prepend/path'], { - 'SOME=ENV': 'SOME_VALUE' - }) - ).toThrow() - }) - - it('should throw if environment variable name contains single quote', () => { - expect(() => - writeRunScript('/test', 'sh', ['-e', 'script.sh'], ['/prepend/path'], { - "SOME'_ENV": 'SOME_VALUE' - }) - ).toThrow() - }) - - it('should throw if environment variable name contains dollar', () => { - expect(() => - writeRunScript('/test', 'sh', ['-e', 'script.sh'], ['/prepend/path'], { - SOME_$_ENV: 'SOME_VALUE' - }) - ).toThrow() - }) - - it('should escape double quote, dollar and backslash in environment variable values', () => { - const { runnerPath } = writeRunScript( - '/test', - 'sh', - ['-e', 'script.sh'], - ['/prepend/path'], - { - DQUOTE: '"', - BACK_SLASH: '\\', - DOLLAR: '$' - } - ) - expect(fs.existsSync(runnerPath)).toBe(true) - const script = fs.readFileSync(runnerPath, 'utf8') - expect(script).toContain('"DQUOTE=\\"') - expect(script).toContain('"BACK_SLASH=\\\\"') - expect(script).toContain('"DOLLAR=\\$"') - }) - - it('should return object with containerPath and runnerPath', () => { - const { containerPath, runnerPath } = writeRunScript( - '/test', - 'sh', - ['-e', 'script.sh'], - ['/prepend/path'], - { - SOME_ENV: 'SOME_VALUE' - } - ) - expect(containerPath).toMatch(/\/__w\/_temp\/.*\.sh/) - const re = new RegExp(`${process.env.RUNNER_TEMP}/.*\\.sh`) - expect(runnerPath).toMatch(re) - }) - - it('should write entrypoint path and the file should exist', () => { - const { runnerPath } = writeRunScript( - '/test', - 'sh', - ['-e', 'script.sh'], - ['/prepend/path'], - { - SOME_ENV: 'SOME_VALUE' - } - ) - expect(fs.existsSync(runnerPath)).toBe(true) - }) - }) - describe('container volumes', () => { beforeEach(async () => { testHelper = new TestHelper() From 1e4e572a424b4e79b4f2c8ef24a95759629dfba0 Mon Sep 17 00:00:00 2001 From: Nikola Jokic Date: Sat, 9 May 2026 20:04:19 +0200 Subject: [PATCH 11/11] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- packages/k8s/src/k8s/utils.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/k8s/src/k8s/utils.ts b/packages/k8s/src/k8s/utils.ts index 5d712067..7038f8c2 100644 --- a/packages/k8s/src/k8s/utils.ts +++ b/packages/k8s/src/k8s/utils.ts @@ -344,6 +344,9 @@ function mergeLists(base?: T[], from?: T[]): T[] { } export function fixArgs(args: string[]): string[] { + if (args.length >= 3 && args[0] === 'sh' && args[1] === '-c') { + return [args[0], args[1], args.slice(2).join(' ')] + } if (args.length >= 2 && args[0] === 'sh' && args[1] === '-c') { return args }