From cdb3e24bc9d61bb55c7b55137d93889ad9b391c6 Mon Sep 17 00:00:00 2001 From: Amanda Murphy Date: Mon, 20 Apr 2026 11:29:01 -0700 Subject: [PATCH 1/3] Retry k8s endpoints --- packages/k8s/src/k8s/index.ts | 219 +++++++++++++++++++++++++++++----- 1 file changed, 189 insertions(+), 30 deletions(-) diff --git a/packages/k8s/src/k8s/index.ts b/packages/k8s/src/k8s/index.ts index ae773da3..112aea1c 100644 --- a/packages/k8s/src/k8s/index.ts +++ b/packages/k8s/src/k8s/index.ts @@ -29,12 +29,121 @@ const kc = new k8s.KubeConfig() kc.loadFromDefault() -const k8sApi = kc.makeApiClient(k8s.CoreV1Api) -const k8sBatchV1Api = kc.makeApiClient(k8s.BatchV1Api) -const k8sAuthorizationV1Api = kc.makeApiClient(k8s.AuthorizationV1Api) - const DEFAULT_WAIT_FOR_POD_TIME_SECONDS = 10 * 60 // 10 min +const RETRYABLE_STATUS_CODES = new Set([408, 429, 500, 502, 503, 504]) +const RETRYABLE_NETWORK_CODES = new Set([ + 'ECONNREFUSED', + 'ECONNRESET', + 'ETIMEDOUT', + 'ENETUNREACH', + 'EAI_AGAIN', + 'ENOTFOUND' +]) +const MAX_RETRIES = 3 +const RETRY_BASE_DELAY_MS = 1000 + +function isRetryableError(err: unknown): boolean { + if (err instanceof k8s.ApiException) { + return RETRYABLE_STATUS_CODES.has(err.code) + } + + let current: unknown = err + while (current instanceof Error) { + if ( + 'code' in current && + typeof current.code === 'string' && + RETRYABLE_NETWORK_CODES.has(current.code) + ) { + return true + } + current = (current as { cause?: unknown }).cause + } + + return false +} + +function retryDelay(attempt: number): number { + return RETRY_BASE_DELAY_MS * 2 ** attempt * (0.5 + Math.random()) +} + +function retryAfterDelay(err: k8s.ApiException, attempt: number): number { + const headerRetrySeconds = err.headers?.['retry-after'] ?? err.headers?.['Retry-After'] + if (!headerRetrySeconds) { + return retryDelay(attempt) + } + + const seconds = Number(headerRetrySeconds) + if (!Number.isFinite(seconds) || seconds <= 0) { + return retryDelay(attempt) + } + + // Cap the delay to 30 seconds + const maxDelaySeconds = 30; + return Math.min(seconds * 1000, maxDelaySeconds * 1000) +} + +function describeError(err: unknown): string { + if (err instanceof k8s.ApiException) { + return `status ${err.code}` + } + let current: unknown = err + while (current instanceof Error) { + if ( + 'code' in current && + typeof current.code === 'string' && + RETRYABLE_NETWORK_CODES.has(current.code) + ) { + return current.code + } + current = (current as { cause?: unknown }).cause + } + return String(err) +} + +function withRetryClient(client: T): T { + const callWithRetry = async ( + fn: (...args: unknown[]) => unknown, + name: string, + args: unknown[] + ): Promise => { + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + try { + return await fn(...args) + } catch (err) { + if (!isRetryableError(err) || attempt === MAX_RETRIES) { + throw err + } + const delay = + err instanceof k8s.ApiException && err.code === 429 + ? retryAfterDelay(err, attempt) + : retryDelay(attempt) + core.warning( + `K8s API call ${name} failed (${describeError(err)}), retrying in ${Math.round(delay)}ms (attempt ${attempt + 1}/${MAX_RETRIES})` + ) + await sleep(delay) + } + } + } + + return new Proxy(client, { + get(target, prop, receiver) { + const value = Reflect.get(target, prop, receiver) + if (typeof value !== 'function') { + return value + } + return async (...args: unknown[]) => + callWithRetry(value.bind(target), String(prop), args) + } + }) +} + +const k8sApi = withRetryClient(kc.makeApiClient(k8s.CoreV1Api)) +const k8sBatchV1Api = withRetryClient(kc.makeApiClient(k8s.BatchV1Api)) +const k8sAuthorizationV1Api = withRetryClient( + kc.makeApiClient(k8s.AuthorizationV1Api) +) + export const requiredPermissions = [ { group: '', @@ -176,10 +285,20 @@ export async function createJobPod( mergePodSpecWithOptions(appPod.spec, extension.spec) } - return await k8sApi.createNamespacedPod({ - namespace: namespace(), - body: appPod - }) + try { + return await k8sApi.createNamespacedPod({ + namespace: namespace(), + body: appPod + }) + } catch (err) { + if (err instanceof k8s.ApiException && err.code === 409) { + return await k8sApi.readNamespacedPod({ + name, + namespace: namespace() + }) + } + throw err + } } export async function createContainerStepPod( @@ -229,18 +348,35 @@ export async function createContainerStepPod( mergePodSpecWithOptions(appPod.spec, extension.spec) } - return await k8sApi.createNamespacedPod({ - namespace: namespace(), - body: appPod - }) + try { + return await k8sApi.createNamespacedPod({ + namespace: namespace(), + body: appPod + }) + } catch (err) { + if (err instanceof k8s.ApiException && err.code === 409) { + return await k8sApi.readNamespacedPod({ + name, + namespace: namespace() + }) + } + throw err + } } export async function deletePod(name: string): Promise { - await k8sApi.deleteNamespacedPod({ - name, - namespace: namespace(), - gracePeriodSeconds: 0 - }) + try { + await k8sApi.deleteNamespacedPod({ + name, + namespace: namespace(), + gracePeriodSeconds: 0 + }) + } catch (err) { + if (err instanceof k8s.ApiException && err.code === 404) { + return + } + throw err + } } export async function execPodStep( @@ -614,11 +750,20 @@ export async function createDockerSecret( 'base64' ) } - - return await k8sApi.createNamespacedSecret({ - namespace: namespace(), - body: secret - }) + try { + return await k8sApi.createNamespacedSecret({ + namespace: namespace(), + body: secret + }) + } catch (err) { + if (err instanceof k8s.ApiException && err.code === 409) { + return await k8sApi.readNamespacedSecret({ + name: secretName, + namespace: namespace() + }) + } + throw err + } } export async function createSecretForEnvs(envs: { @@ -642,18 +787,32 @@ export async function createSecretForEnvs(envs: { secret.data[key] = Buffer.from(value).toString('base64') } - await k8sApi.createNamespacedSecret({ - namespace: namespace(), - body: secret - }) + try { + await k8sApi.createNamespacedSecret({ + namespace: namespace(), + body: secret + }) + } catch (err) { + if (!(err instanceof k8s.ApiException && err.code === 409)) { + throw err + } + } + return secretName } export async function deleteSecret(name: string): Promise { - await k8sApi.deleteNamespacedSecret({ - name, - namespace: namespace() - }) + try { + await k8sApi.deleteNamespacedSecret({ + name, + namespace: namespace() + }) + } catch (err) { + if (err instanceof k8s.ApiException && err.code === 404) { + return + } + throw err + } } export async function pruneSecrets(): Promise { From 3119f84e5bd36b78f529edc337e38a3e0a3ed5b4 Mon Sep 17 00:00:00 2001 From: Amanda Murphy Date: Mon, 20 Apr 2026 11:51:46 -0700 Subject: [PATCH 2/3] Add unit tests --- packages/k8s/src/k8s/index.ts | 4 +- packages/k8s/tests/k8s-retry-test.ts | 165 +++++++++++++++++++++++++++ 2 files changed, 167 insertions(+), 2 deletions(-) create mode 100644 packages/k8s/tests/k8s-retry-test.ts diff --git a/packages/k8s/src/k8s/index.ts b/packages/k8s/src/k8s/index.ts index 112aea1c..1de535a7 100644 --- a/packages/k8s/src/k8s/index.ts +++ b/packages/k8s/src/k8s/index.ts @@ -43,7 +43,7 @@ const RETRYABLE_NETWORK_CODES = new Set([ const MAX_RETRIES = 3 const RETRY_BASE_DELAY_MS = 1000 -function isRetryableError(err: unknown): boolean { +export function isRetryableError(err: unknown): boolean { if (err instanceof k8s.ApiException) { return RETRYABLE_STATUS_CODES.has(err.code) } @@ -67,7 +67,7 @@ function retryDelay(attempt: number): number { return RETRY_BASE_DELAY_MS * 2 ** attempt * (0.5 + Math.random()) } -function retryAfterDelay(err: k8s.ApiException, attempt: number): number { +export function retryAfterDelay(err: k8s.ApiException, attempt: number): number { const headerRetrySeconds = err.headers?.['retry-after'] ?? err.headers?.['Retry-After'] if (!headerRetrySeconds) { return retryDelay(attempt) diff --git a/packages/k8s/tests/k8s-retry-test.ts b/packages/k8s/tests/k8s-retry-test.ts new file mode 100644 index 00000000..ed99461f --- /dev/null +++ b/packages/k8s/tests/k8s-retry-test.ts @@ -0,0 +1,165 @@ +import * as k8s from '@kubernetes/client-node' +import { isRetryableError, retryAfterDelay } from '../src/k8s' + +function apiException( + code: number, + headers: { [key: string]: string } = {} +): k8s.ApiException { + return new k8s.ApiException(code, `status ${code}`, {}, headers) +} + +function networkError(code: string, cause?: unknown): Error { + const err = new Error(`network error ${code}`) as Error & { + code: string + cause?: unknown + } + err.code = code + if (cause !== undefined) { + err.cause = cause + } + return err +} + +describe('isRetryableError', () => { + it.each([408, 429, 500, 502, 503, 504])( + 'returns true for ApiException with status %i', + code => { + expect(isRetryableError(apiException(code))).toBe(true) + } + ) + + it.each([400, 401, 403, 404, 409, 422])( + 'returns false for ApiException with status %i', + code => { + expect(isRetryableError(apiException(code))).toBe(false) + } + ) + + it('returns false for ApiException even if cause chain has a retryable network code', () => { + // The ApiException branch does not descend into the cause chain — + // if the API responded 400, retrying is pointless regardless of + // what happened underneath. Documents current precedence. + const inner = networkError('ECONNRESET') + const err = apiException(400) + ;(err as unknown as { cause: unknown }).cause = inner + expect(isRetryableError(err)).toBe(false) + }) + + it.each([ + 'ECONNREFUSED', + 'ECONNRESET', + 'ETIMEDOUT', + 'ENETUNREACH', + 'EAI_AGAIN', + 'ENOTFOUND' + ])('returns true for plain Error with network code %s', code => { + expect(isRetryableError(networkError(code))).toBe(true) + }) + + it('returns false for plain Error with non-retryable code', () => { + expect(isRetryableError(networkError('EACCES'))).toBe(false) + }) + + it('returns true when a retryable code is nested in the cause chain', () => { + const root = networkError('ECONNRESET') + const middle = new Error('middle') as Error & { cause: unknown } + middle.cause = root + const outer = new Error('outer') as Error & { cause: unknown } + outer.cause = middle + expect(isRetryableError(outer)).toBe(true) + }) + + it('returns false for null', () => { + expect(isRetryableError(null)).toBe(false) + }) + + it('returns false for undefined', () => { + expect(isRetryableError(undefined)).toBe(false) + }) + + it('returns false for a string', () => { + expect(isRetryableError('ECONNRESET')).toBe(false) + }) + + it('returns false for Error with no code property', () => { + expect(isRetryableError(new Error('boom'))).toBe(false) + }) + + it('returns false for Error whose code is non-string', () => { + const err = new Error('boom') as Error & { code: number } + err.code = 42 + expect(isRetryableError(err)).toBe(false) + }) +}) + +describe('retryAfterDelay', () => { + it('falls back to exponential backoff when Retry-After header is missing', () => { + const delay = retryAfterDelay(apiException(429), 0) + // retryDelay(0) = 1000 * 1 * (0.5..1.5) = 500..1500 + expect(delay).toBeGreaterThanOrEqual(500) + expect(delay).toBeLessThanOrEqual(1500) + }) + + it('uses lowercase retry-after header value in seconds', () => { + const delay = retryAfterDelay( + apiException(429, { 'retry-after': '5' }), + 0 + ) + expect(delay).toBe(5000) + }) + + it('uses capitalized Retry-After header', () => { + const delay = retryAfterDelay( + apiException(429, { 'Retry-After': '7' }), + 0 + ) + expect(delay).toBe(7000) + }) + + it('caps the delay at 30 seconds', () => { + const delay = retryAfterDelay( + apiException(429, { 'retry-after': '600' }), + 0 + ) + expect(delay).toBe(30_000) + }) + + it('falls back when Retry-After value is not numeric (HTTP-date format)', () => { + // Per RFC 7231 the header may be an HTTP-date; we only handle seconds. + const delay = retryAfterDelay( + apiException(429, { 'retry-after': 'Wed, 21 Oct 2026 07:28:00 GMT' }), + 1 + ) + // retryDelay(1) = 2000 * (0.5..1.5) = 1000..3000 + expect(delay).toBeGreaterThanOrEqual(1000) + expect(delay).toBeLessThanOrEqual(3000) + }) + + it('falls back when Retry-After is zero', () => { + const delay = retryAfterDelay( + apiException(429, { 'retry-after': '0' }), + 2 + ) + // retryDelay(2) = 4000 * (0.5..1.5) = 2000..6000 + expect(delay).toBeGreaterThanOrEqual(2000) + expect(delay).toBeLessThanOrEqual(6000) + }) + + it('falls back when Retry-After is negative', () => { + const delay = retryAfterDelay( + apiException(429, { 'retry-after': '-5' }), + 0 + ) + expect(delay).toBeGreaterThanOrEqual(500) + expect(delay).toBeLessThanOrEqual(1500) + }) + + it('falls back when Retry-After is empty', () => { + const delay = retryAfterDelay( + apiException(429, { 'retry-after': '' }), + 0 + ) + expect(delay).toBeGreaterThanOrEqual(500) + expect(delay).toBeLessThanOrEqual(1500) + }) +}) From 34f9afa61c083d4031a7c395c2f7f0c88a0145de Mon Sep 17 00:00:00 2001 From: Amanda Murphy Date: Fri, 22 May 2026 13:43:05 -0700 Subject: [PATCH 3/3] Verify secret data on retry-induced 409s; add retry-wrapper tests --- .idea/.gitignore | 10 + .idea/codeStyles/Project.xml | 61 ++++ .idea/codeStyles/codeStyleConfig.xml | 5 + .idea/inspectionProfiles/Project_Default.xml | 6 + .idea/modules.xml | 8 + .idea/prettier.xml | 6 + .idea/runner-container-hooks.iml | 12 + .idea/vcs.xml | 6 + packages/k8s/src/k8s/index.ts | 76 ++++- packages/k8s/tests/k8s-retry-client-test.ts | 278 +++++++++++++++++++ 10 files changed, 459 insertions(+), 9 deletions(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/codeStyles/Project.xml create mode 100644 .idea/codeStyles/codeStyleConfig.xml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/prettier.xml create mode 100644 .idea/runner-container-hooks.iml create mode 100644 .idea/vcs.xml create mode 100644 packages/k8s/tests/k8s-retry-client-test.ts diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 00000000..30cf57ed --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,10 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Ignored default folder with query files +/queries/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/codeStyles/Project.xml b/.idea/codeStyles/Project.xml new file mode 100644 index 00000000..14ad7b18 --- /dev/null +++ b/.idea/codeStyles/Project.xml @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml new file mode 100644 index 00000000..79ee123c --- /dev/null +++ b/.idea/codeStyles/codeStyleConfig.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 00000000..03d9549e --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 00000000..fb501f3e --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/prettier.xml b/.idea/prettier.xml new file mode 100644 index 00000000..b0c1c68f --- /dev/null +++ b/.idea/prettier.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/.idea/runner-container-hooks.iml b/.idea/runner-container-hooks.iml new file mode 100644 index 00000000..24643cc3 --- /dev/null +++ b/.idea/runner-container-hooks.iml @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000..94a25f7f --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/packages/k8s/src/k8s/index.ts b/packages/k8s/src/k8s/index.ts index 1de535a7..d847d460 100644 --- a/packages/k8s/src/k8s/index.ts +++ b/packages/k8s/src/k8s/index.ts @@ -79,7 +79,7 @@ export function retryAfterDelay(err: k8s.ApiException, attempt: number) } // Cap the delay to 30 seconds - const maxDelaySeconds = 30; + const maxDelaySeconds = 30 return Math.min(seconds * 1000, maxDelaySeconds * 1000) } @@ -101,7 +101,22 @@ function describeError(err: unknown): string { return String(err) } -function withRetryClient(client: T): T { +// WARNING: at-least-once delivery for writes. +// +// This proxy retries EVERY method on the wrapped client when isRetryableError +// returns true, including non-idempotent writes (createNamespaced*, +// patchNamespaced*, replaceNamespaced*). A 5xx from an intermediary or an +// ECONNRESET *after* the server has already committed a POST will cause a +// retry; on the next attempt the server returns 409 AlreadyExists, which then +// surfaces to the caller as a hard failure even though the original write +// succeeded. +// +// Every caller that POSTs/PUTs/PATCHes through this wrapped client MUST handle +// 409 explicitly (see createJobPod, createContainerStepPod, createDockerSecret, +// createSecretForEnvs for the existing call sites). If you add a new write +// caller and skip the 409 fallback, you will get spurious failures under +// transient network conditions. +export function withRetryClient(client: T): T { const callWithRetry = async ( fn: (...args: unknown[]) => unknown, name: string, @@ -119,7 +134,7 @@ function withRetryClient(client: T): T { ? retryAfterDelay(err, attempt) : retryDelay(attempt) core.warning( - `K8s API call ${name} failed (${describeError(err)}), retrying in ${Math.round(delay)}ms (attempt ${attempt + 1}/${MAX_RETRIES})` + `K8s API call ${name} failed (${describeError(err)}), retrying in ${Math.round(delay)}ms (retry ${attempt + 1} of ${MAX_RETRIES})` ) await sleep(delay) } @@ -756,13 +771,33 @@ export async function createDockerSecret( body: secret }) } catch (err) { - if (err instanceof k8s.ApiException && err.code === 409) { - return await k8sApi.readNamespacedSecret({ - name: secretName, - namespace: namespace() - }) + if (!(err instanceof k8s.ApiException && err.code === 409)) { + throw err } - throw err + // 409 here is almost certainly the retry-induced case: our POST committed + // server-side, an intermediary returned 5xx / ECONNRESET, withRetryClient + // retried, and the second attempt saw the secret we just created. Verify + // the existing secret's data matches what we tried to write before + // returning it — otherwise the pod will pull images with stale + // credentials. Secrets are immutable (set above), so we cannot + // replace/patch on mismatch; surface the collision instead. + const existing = await k8sApi.readNamespacedSecret({ + name: secretName, + namespace: namespace() + }) + const existingData = existing.data ?? {} + const desiredData = secret.data ?? {} + const desiredKeys = Object.keys(desiredData) + const existingKeys = Object.keys(existingData) + const mismatch = + desiredKeys.length !== existingKeys.length || + desiredKeys.some(k => existingData[k] !== desiredData[k]) + if (mismatch) { + throw new Error( + `docker secret ${secretName} already exists with data that does not match the requested registry credentials; refusing to use stale secret` + ) + } + return existing } } @@ -796,6 +831,29 @@ export async function createSecretForEnvs(envs: { if (!(err instanceof k8s.ApiException && err.code === 409)) { throw err } + // 409 here is almost certainly the retry-induced case: our POST committed + // server-side, an intermediary returned 5xx / ECONNRESET, withRetryClient + // retried, and the second attempt saw the secret we just created. Verify + // the existing secret's data matches what we tried to write before + // claiming success — otherwise the caller will mount stale env values. + // Secrets are immutable (set above), so we cannot replace/patch on + // mismatch; surface the collision instead. + const existing = await k8sApi.readNamespacedSecret({ + name: secretName, + namespace: namespace() + }) + const existingData = existing.data ?? {} + const desiredData = secret.data ?? {} + const desiredKeys = Object.keys(desiredData) + const existingKeys = Object.keys(existingData) + const mismatch = + desiredKeys.length !== existingKeys.length || + desiredKeys.some(k => existingData[k] !== desiredData[k]) + if (mismatch) { + throw new Error( + `secret ${secretName} already exists with data that does not match the requested envs; refusing to mount stale secret` + ) + } } return secretName diff --git a/packages/k8s/tests/k8s-retry-client-test.ts b/packages/k8s/tests/k8s-retry-client-test.ts new file mode 100644 index 00000000..a59d98d0 --- /dev/null +++ b/packages/k8s/tests/k8s-retry-client-test.ts @@ -0,0 +1,278 @@ +import * as k8s from '@kubernetes/client-node' + +// Silence retry warnings emitted by the wrapper. +jest.mock('@actions/core', () => ({ + warning: jest.fn(), + debug: jest.fn(), + info: jest.fn(), + error: jest.fn(), + setFailed: jest.fn(), + setOutput: jest.fn() +})) + +// Replace sleep so retry backoff doesn't actually wait, and we can assert +// what delay the wrapper requested. +jest.mock('../src/k8s/utils', () => ({ + ...jest.requireActual('../src/k8s/utils'), + sleep: jest.fn().mockResolvedValue(undefined) +})) + +// Replace KubeConfig so the module-level k8sApi/k8sBatchV1Api/k8sAuthorizationV1Api +// are wired to a fake API we control. Every real class (ApiException, V1Pod, +// V1Secret, etc.) is preserved via jest.requireActual so `instanceof` checks +// and constructors still work normally. +jest.mock('@kubernetes/client-node', () => { + const actual = jest.requireActual('@kubernetes/client-node') + const fakeApi: Record = { + createNamespacedPod: jest.fn(), + readNamespacedPod: jest.fn(), + deleteNamespacedPod: jest.fn(), + listNamespacedPod: jest.fn(), + createNamespacedSecret: jest.fn(), + readNamespacedSecret: jest.fn(), + deleteNamespacedSecret: jest.fn(), + listNamespacedSecret: jest.fn(), + readNamespacedJob: jest.fn(), + createSelfSubjectAccessReview: jest.fn() + } + class FakeKubeConfig { + loadFromDefault(): void {} + getContexts(): unknown[] { + return [{ namespace: 'test-ns', name: 'test', cluster: 'c', user: 'u' }] + } + makeApiClient(): typeof fakeApi { + return fakeApi + } + } + return { + ...actual, + __getFakeApi: () => fakeApi, + KubeConfig: FakeKubeConfig + } +}) + +import { sleep } from '../src/k8s/utils' +import { + createDockerSecret, + createSecretForEnvs, + deletePod, + deleteSecret, + withRetryClient +} from '../src/k8s' + +const fakeApi = (k8s as unknown as { + __getFakeApi: () => Record +}).__getFakeApi() +const mockedSleep = sleep as jest.Mock + +function apiException( + code: number, + headers: Record = {} +): k8s.ApiException { + return new k8s.ApiException(code, `status ${code}`, {}, headers) +} + +function networkError(code: string): Error { + const err = new Error(`network error ${code}`) as Error & { code: string } + err.code = code + return err +} + +beforeAll(() => { + process.env.ACTIONS_RUNNER_POD_NAME = 'test-runner' + process.env.ACTIONS_RUNNER_KUBERNETES_NAMESPACE = 'test-ns' +}) + +beforeEach(() => { + for (const fn of Object.values(fakeApi)) { + fn.mockReset() + } + mockedSleep.mockReset() + mockedSleep.mockResolvedValue(undefined) +}) + +describe('withRetryClient', () => { + it('returns value on first attempt without sleeping', async () => { + const fn = jest.fn().mockResolvedValue('ok') + const wrapped = withRetryClient({ call: fn }) as { call: typeof fn } + await expect(wrapped.call('arg')).resolves.toBe('ok') + expect(fn).toHaveBeenCalledTimes(1) + expect(fn).toHaveBeenCalledWith('arg') + expect(mockedSleep).not.toHaveBeenCalled() + }) + + it('retries on retryable network error and eventually succeeds', async () => { + const fn = jest + .fn() + .mockRejectedValueOnce(networkError('ECONNRESET')) + .mockRejectedValueOnce(networkError('ETIMEDOUT')) + .mockResolvedValueOnce('ok') + const wrapped = withRetryClient({ call: fn }) as { call: typeof fn } + await expect(wrapped.call()).resolves.toBe('ok') + expect(fn).toHaveBeenCalledTimes(3) + expect(mockedSleep).toHaveBeenCalledTimes(2) + }) + + it('gives up after MAX_RETRIES and throws the final error', async () => { + const err = apiException(500) + const fn = jest.fn().mockRejectedValue(err) + const wrapped = withRetryClient({ call: fn }) as { call: typeof fn } + await expect(wrapped.call()).rejects.toBe(err) + // 1 initial attempt + 3 retries = 4 calls, with sleep between each pair + // of consecutive attempts but not after the final failure. + expect(fn).toHaveBeenCalledTimes(4) + expect(mockedSleep).toHaveBeenCalledTimes(3) + }) + + it('does not retry non-retryable ApiException (e.g. 409)', async () => { + const err = apiException(409) + const fn = jest.fn().mockRejectedValue(err) + const wrapped = withRetryClient({ call: fn }) as { call: typeof fn } + await expect(wrapped.call()).rejects.toBe(err) + expect(fn).toHaveBeenCalledTimes(1) + expect(mockedSleep).not.toHaveBeenCalled() + }) + + it('does not retry plain Error with no retryable code', async () => { + const err = new Error('boom') + const fn = jest.fn().mockRejectedValue(err) + const wrapped = withRetryClient({ call: fn }) as { call: typeof fn } + await expect(wrapped.call()).rejects.toBe(err) + expect(fn).toHaveBeenCalledTimes(1) + }) + + it('uses Retry-After header value for 429', async () => { + const fn = jest + .fn() + .mockRejectedValueOnce(apiException(429, { 'retry-after': '7' })) + .mockResolvedValueOnce('ok') + const wrapped = withRetryClient({ call: fn }) as { call: typeof fn } + await expect(wrapped.call()).resolves.toBe('ok') + expect(mockedSleep).toHaveBeenCalledWith(7000) + }) + + it('uses exponential backoff for non-429 retryable errors', async () => { + const fn = jest + .fn() + .mockRejectedValueOnce(apiException(500)) + .mockResolvedValueOnce('ok') + const wrapped = withRetryClient({ call: fn }) as { call: typeof fn } + await expect(wrapped.call()).resolves.toBe('ok') + // retryDelay(0) = 1000 * 2^0 * (0.5..1.5) = 500..1500 + const delay = mockedSleep.mock.calls[0][0] + expect(delay).toBeGreaterThanOrEqual(500) + expect(delay).toBeLessThanOrEqual(1500) + }) + + it('passes non-function properties through untouched', () => { + const target = { name: 'k8sApi', count: 42, fn: jest.fn() } + const wrapped = withRetryClient(target) as typeof target + expect(wrapped.name).toBe('k8sApi') + expect(wrapped.count).toBe(42) + expect(typeof wrapped.fn).toBe('function') + }) +}) + +describe('idempotent write fallbacks', () => { + describe('createSecretForEnvs', () => { + it('returns the secret name when create succeeds', async () => { + fakeApi.createNamespacedSecret.mockResolvedValue({}) + const name = await createSecretForEnvs({ FOO: 'bar' }) + expect(name).toMatch(/-secret-/) + expect(fakeApi.createNamespacedSecret).toHaveBeenCalledTimes(1) + expect(fakeApi.readNamespacedSecret).not.toHaveBeenCalled() + }) + + it('returns the secret name on 409 when existing data matches', async () => { + const envs = { FOO: 'bar', BAZ: 'qux' } + const expectedData: Record = {} + for (const [k, v] of Object.entries(envs)) { + expectedData[k] = Buffer.from(v).toString('base64') + } + fakeApi.createNamespacedSecret.mockRejectedValue(apiException(409)) + fakeApi.readNamespacedSecret.mockResolvedValue({ data: expectedData }) + + const name = await createSecretForEnvs(envs) + expect(name).toMatch(/-secret-/) + expect(fakeApi.readNamespacedSecret).toHaveBeenCalledTimes(1) + }) + + it('throws on 409 when existing data differs', async () => { + fakeApi.createNamespacedSecret.mockRejectedValue(apiException(409)) + fakeApi.readNamespacedSecret.mockResolvedValue({ + data: { FOO: Buffer.from('stale').toString('base64') } + }) + await expect(createSecretForEnvs({ FOO: 'bar' })).rejects.toThrow( + /does not match the requested envs/ + ) + }) + + it('rethrows non-409 errors without reading', async () => { + fakeApi.createNamespacedSecret.mockRejectedValue(apiException(403)) + await expect( + createSecretForEnvs({ FOO: 'bar' }) + ).rejects.toBeInstanceOf(k8s.ApiException) + expect(fakeApi.readNamespacedSecret).not.toHaveBeenCalled() + }) + }) + + describe('createDockerSecret', () => { + const registry = { + serverUrl: 'https://reg.example.com', + username: 'u', + password: 'p' + } + + it('returns existing secret on 409 when data matches', async () => { + // Echo back whatever the function tried to write so the comparison + // passes — this exercises the matching branch without re-deriving the + // function's dockerconfigjson encoding here. + let attempted: { data?: Record } = {} + fakeApi.createNamespacedSecret.mockImplementation(async (req: { + body: { data?: Record } + }) => { + attempted = req.body + throw apiException(409) + }) + fakeApi.readNamespacedSecret.mockImplementation(async () => ({ + data: attempted.data + })) + + const result = await createDockerSecret(registry) + expect(result).toBeDefined() + expect(fakeApi.readNamespacedSecret).toHaveBeenCalledTimes(1) + }) + + it('throws on 409 with mismatched credentials', async () => { + fakeApi.createNamespacedSecret.mockRejectedValue(apiException(409)) + fakeApi.readNamespacedSecret.mockResolvedValue({ + data: { '.dockerconfigjson': Buffer.from('stale').toString('base64') } + }) + await expect(createDockerSecret(registry)).rejects.toThrow( + /does not match the requested registry credentials/ + ) + }) + }) + + describe('delete handlers', () => { + it('deletePod swallows 404', async () => { + fakeApi.deleteNamespacedPod.mockRejectedValue(apiException(404)) + await expect(deletePod('p')).resolves.toBeUndefined() + }) + + it('deletePod rethrows other errors', async () => { + fakeApi.deleteNamespacedPod.mockRejectedValue(apiException(403)) + await expect(deletePod('p')).rejects.toBeInstanceOf(k8s.ApiException) + }) + + it('deleteSecret swallows 404', async () => { + fakeApi.deleteNamespacedSecret.mockRejectedValue(apiException(404)) + await expect(deleteSecret('s')).resolves.toBeUndefined() + }) + + it('deleteSecret rethrows other errors', async () => { + fakeApi.deleteNamespacedSecret.mockRejectedValue(apiException(403)) + await expect(deleteSecret('s')).rejects.toBeInstanceOf(k8s.ApiException) + }) + }) +})