From 6a71cb2d8025d66a162945de2ee50531d596aab2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Sat, 30 May 2026 03:21:22 +0200 Subject: [PATCH 01/11] feat: add timer-overhead correction, saturation warning, and resolution diagnostic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three cooperating diagnostics for sub-microsecond benchmarking, integrated in a single coherent code path so that each one can rely on the others: - BenchOptions.subtractTimerOverhead (default false): when enabled, the cost of one timestamp provider call is calibrated once at construction time via the new exported calibrateTimerOverhead helper, then subtracted from each raw latency sample (clamped to zero) before statistics are computed. Samples returned by the task function via overriddenDuration are intentional user values and are skipped by the correction. - 'warning' event on BenchEvents and TaskEvents, dispatched on both the Bench and the Task instances when the latency samples of a task are dominated by the timer resolution. Detection uses three OR'd criteria computed by detectTimerSaturation: more than half zero samples, fewer than max(3, min(10, n/1000)) distinct values, or zero MAD with n > 100. An n < 10 guard prevents false positives on unit-style benchmarks. - Task.detectedResolution getter, populated after each run with the smallest strictly-positive sample value that appears at least twice (smallest reproducibly observed increment). Falls back to the strict minimum when no positive value repeats. A new estimateResolution helper is exported. calibrateTimerOverhead (utils.ts): - Subtracts in the provider's native type before converting to milliseconds (toMs(b - a) rather than toMs(b) - toMs(a)), preserving bigint precision on long-uptime hosts. - Discards a configurable warmup phase (default 64 pairs) so the JIT reaches its steady-state tier before measurements begin. - Returns 0 when fewer than half the back-to-back pairs produce a positive delta — in that regime the timer resolution exceeds the call cost and the positive deltas measure a tick boundary, not the call cost. - Configurable estimator: 'median' (default), 'min', or 'p05'. Task#processRunResult orders the diagnostics so they always reflect the raw, uncorrected measurements: 1. sortSamples on raw latencies 2. estimateResolution on raw sorted samples 3. when overhead correction is active, compute raw statistics, evaluate detectTimerSaturation against the raw distribution, then apply the correction in-place (skipping overridden samples) and re-sort only when overridden samples were skipped 4. compute the final (possibly corrected) statistics 5. when no correction was applied, evaluate detectTimerSaturation against the final samples (raw == final in this path) This consolidates three previously separate proposals (PRs #568/#569/#570) into a single coherent change: composing them naively would have caused the diagnostics to operate on the corrected-and-clamped sample set, producing artificially small detected-resolution values and false-positive saturation warnings on benchmarks that activate overhead subtraction. --- src/bench.ts | 21 ++ src/index.ts | 13 +- src/task.ts | 137 ++++++++++--- src/types.ts | 32 ++- src/utils.ts | 188 ++++++++++++++++++ test/calibrate-timer-overhead.test.ts | 108 ++++++++++ test/detected-resolution.test.ts | 73 +++++++ ...subtract-timer-overhead-overridden.test.ts | 78 ++++++++ test/utils-detect-timer-saturation.test.ts | 55 +++++ 9 files changed, 676 insertions(+), 29 deletions(-) create mode 100644 test/calibrate-timer-overhead.test.ts create mode 100644 test/detected-resolution.test.ts create mode 100644 test/subtract-timer-overhead-overridden.test.ts create mode 100644 test/utils-detect-timer-saturation.test.ts diff --git a/src/bench.ts b/src/bench.ts index 608dfc09..4fcffed4 100644 --- a/src/bench.ts +++ b/src/bench.ts @@ -24,6 +24,7 @@ import { BenchEvent } from './event' import { Task } from './task' import { assert, + calibrateTimerOverhead, defaultConvertTaskResultForConsoleTable, getTimestampProvider, runtime, @@ -95,6 +96,13 @@ export class Bench extends EventTarget implements BenchLike { */ readonly signal?: AbortSignal + /** + * Whether to subtract an estimated timestamp provider call overhead from + * each raw latency sample. + * @default false + */ + readonly subtractTimerOverhead: boolean + /** * A teardown function that runs after each task execution. */ @@ -120,6 +128,15 @@ export class Bench extends EventTarget implements BenchLike { */ readonly time: number + /** + * The estimated cost of one timestamp provider call in milliseconds. + * + * `undefined` when {@link subtractTimerOverhead} is `false`. + * Otherwise calibrated once at construction time via + * {@link calibrateTimerOverhead}. + */ + readonly timerOverhead: number | undefined + /** * A timestamp provider and its related functions. */ @@ -195,6 +212,10 @@ export class Bench extends EventTarget implements BenchLike { this.throws = restOptions.throws ?? false this.signal = restOptions.signal this.retainSamples = restOptions.retainSamples === true + this.subtractTimerOverhead = restOptions.subtractTimerOverhead === true + this.timerOverhead = this.subtractTimerOverhead + ? calibrateTimerOverhead(this.timestampProvider) + : undefined if (this.signal) { this.signal.addEventListener( diff --git a/src/index.ts b/src/index.ts index d66d7607..b4f662b3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -37,4 +37,15 @@ export type { TimestampProvider, TimestampValue, } from './types' -export { formatNumber, hrtimeNow, performanceNow as now, nToMs } from './utils' +export type { + CalibrateTimerOverheadOptions, + TimerOverheadEstimator, +} from './utils' +export { + calibrateTimerOverhead, + estimateResolution, + formatNumber, + hrtimeNow, + performanceNow as now, + nToMs, +} from './utils' diff --git a/src/task.ts b/src/task.ts index 6fefbd8a..c3e9a552 100644 --- a/src/task.ts +++ b/src/task.ts @@ -20,6 +20,8 @@ import { BenchEvent } from './event' import { assert, computeStatistics, + detectTimerSaturation, + estimateResolution, isFnAsyncResource, isPromiseLike, isValidSamples, @@ -70,6 +72,17 @@ export class Task extends EventTarget { options?: RemoveEventListenerOptionsArgument ) => void + /** + * The estimated effective timer resolution observed during the last run, + * computed as the smallest strictly positive latency sample that appears + * at least twice in the sample set. + * @returns The resolution in milliseconds, or `undefined` when no run has + * produced a strictly positive sample + */ + get detectedResolution (): number | undefined { + return this.#detectedResolution + } + /** * The name of the task. * @returns The task name as a string @@ -116,6 +129,11 @@ export class Task extends EventTarget { */ readonly #bench: BenchLike + /** + * The estimated effective timer resolution from the last run. + */ + #detectedResolution: number | undefined = undefined + /** * The task function */ @@ -217,6 +235,7 @@ export class Task extends EventTarget { */ reset (emit = true): void { this.#runs = 0 + this.#detectedResolution = undefined this.#result = this.#aborted ? abortedTaskResult : notStartedTaskResult if (emit) this.dispatchEvent(new BenchEvent('reset', this)) @@ -233,14 +252,14 @@ export class Task extends EventTarget { this.#result = { state: 'started' } this.dispatchEvent(new BenchEvent('start', this)) await this.#bench.setup(this, 'run') - const { error, samples: latencySamples } = await this.#benchmark( - 'run', - this.#bench.time, - this.#bench.iterations - ) + const { + error, + isOverridden, + samples: latencySamples, + } = await this.#benchmark('run', this.#bench.time, this.#bench.iterations) await this.#bench.teardown(this, 'run') - this.#processRunResult({ error, latencySamples }) + this.#processRunResult({ error, isOverridden, latencySamples }) return this } @@ -267,11 +286,11 @@ export class Task extends EventTarget { '`setup` function must be sync when using `runSync()`' ) - const { error, samples: latencySamples } = this.#benchmarkSync( - 'run', - this.#bench.time, - this.#bench.iterations - ) + const { + error, + isOverridden, + samples: latencySamples, + } = this.#benchmarkSync('run', this.#bench.time, this.#bench.iterations) const teardownResult = this.#bench.teardown(this, 'run') assert( @@ -279,7 +298,7 @@ export class Task extends EventTarget { '`teardown` function must be sync when using `runSync()`' ) - this.#processRunResult({ error, latencySamples }) + this.#processRunResult({ error, isOverridden, latencySamples }) return this } @@ -339,7 +358,8 @@ export class Task extends EventTarget { time: number, iterations: number ): Promise< - { error: Error; samples?: never } | { error?: never; samples?: Samples } + | { error: Error; isOverridden?: never; samples?: never } + | { error?: never; isOverridden?: boolean[]; samples?: Samples } > { try { if (this.#fnOpts.beforeAll) { @@ -348,6 +368,8 @@ export class Task extends EventTarget { let totalTime = 0 // ms const samples: number[] = [] + const isOverridden: boolean[] | undefined = + this.#bench.timerOverhead !== undefined ? [] : undefined const benchmarkTask = async () => { if (this.#aborted) { @@ -358,11 +380,12 @@ export class Task extends EventTarget { await this.#fnOpts.beforeEach.call(this, mode) } - const taskTime = this.#async + const { overridden, taskTime } = this.#async ? await this.#measure() : this.#measureSync() samples.push(taskTime) + isOverridden?.push(overridden) totalTime += taskTime } finally { if (this.#fnOpts.afterEach != null) { @@ -395,7 +418,7 @@ export class Task extends EventTarget { await this.#fnOpts.afterAll.call(this, mode) } - return isValidSamples(samples) ? { samples } : {} + return isValidSamples(samples) ? { isOverridden, samples } : {} } catch (error) { return { error: toError(error) } } @@ -411,7 +434,9 @@ export class Task extends EventTarget { mode: 'run' | 'warmup', time: number, iterations: number - ): { error: Error; samples?: never } | { error?: never; samples?: Samples } { + ): + | { error: Error; isOverridden?: never; samples?: never } + | { error?: never; isOverridden?: boolean[]; samples?: Samples } { try { if (this.#fnOpts.beforeAll) { const beforeAllResult = this.#fnOpts.beforeAll.call(this, mode) @@ -423,6 +448,8 @@ export class Task extends EventTarget { let totalTime = 0 const samples: number[] = [] + const isOverridden: boolean[] | undefined = + this.#bench.timerOverhead !== undefined ? [] : undefined const benchmarkTask = () => { if (this.#aborted) { @@ -437,9 +464,10 @@ export class Task extends EventTarget { ) } - const taskTime = this.#measureSync() + const { overridden, taskTime } = this.#measureSync() samples.push(taskTime) + isOverridden?.push(overridden) totalTime += taskTime } finally { if (this.#fnOpts.afterEach) { @@ -467,7 +495,7 @@ export class Task extends EventTarget { '`afterAll` function must be sync when using `runSync()`' ) } - return isValidSamples(samples) ? { samples } : {} + return isValidSamples(samples) ? { isOverridden, samples } : {} } catch (error) { return { error: toError(error) } } @@ -475,9 +503,10 @@ export class Task extends EventTarget { /** * Measures a single execution of the task function asynchronously. - * @returns The measured execution time + * @returns The measured execution time and whether it was supplied by the + * task function via `overriddenDuration` */ - async #measure (): Promise { + async #measure (): Promise<{ overridden: boolean; taskTime: number }> { const taskStart = this.#timestampFn() as unknown as number // eslint-disable-next-line no-useless-call const fnResult = await this.#fn.call(this) @@ -487,16 +516,17 @@ export class Task extends EventTarget { const overriddenDuration = getOverriddenDurationFromFnResult(fnResult) if (overriddenDuration !== undefined) { - return overriddenDuration + return { overridden: true, taskTime: overriddenDuration } } - return taskTime + return { overridden: false, taskTime } } /** * Measures a single execution of the task function synchronously. - * @returns The measured execution time + * @returns The measured execution time and whether it was supplied by the + * task function via `overriddenDuration` */ - #measureSync (): number { + #measureSync (): { overridden: boolean; taskTime: number } { const taskStart = this.#timestampFn() as unknown as number // eslint-disable-next-line no-useless-call const fnResult = this.#fn.call(this) @@ -510,9 +540,9 @@ export class Task extends EventTarget { ) const overriddenDuration = getOverriddenDurationFromFnResult(fnResult) if (overriddenDuration !== undefined) { - return overriddenDuration + return { overridden: true, taskTime: overriddenDuration } } - return taskTime + return { overridden: false, taskTime } } /** @@ -555,15 +585,35 @@ export class Task extends EventTarget { /** * Processes the result of a benchmark run and updates the task result. * Calculates statistics from the collected samples and dispatches appropriate events. - * @param options - An object containing the error and latency samples from the run + * + * Ordering: + * 1. Sort raw samples in place. + * 2. Compute the raw timer-resolution diagnostic ({@link estimateResolution}). + * 3. If overhead correction is enabled: compute raw statistics for an + * accurate `mad`, evaluate timer-saturation against the **raw** sample + * set, then subtract the calibrated overhead from each sample whose + * duration was measured by the timer (skipping samples supplied via + * `overriddenDuration`). Re-sort only when overridden samples were + * skipped, since correction otherwise preserves the ascending order. + * 4. Compute the final (possibly corrected) statistics. + * 5. When no correction was applied, evaluate timer-saturation against the + * final samples (raw == final in this case). + * 6. Dispatch `'cycle'` and `'complete'` events; dispatch `'warning'` if + * timer saturation was detected. + * @param options - An object containing the run results * @param options.error - The error that occurred during the run, if any + * @param options.isOverridden - Parallel boolean array indicating which + * samples were supplied by the task function via `overriddenDuration`, + * or `undefined` when overhead correction is disabled * @param options.latencySamples - The array of latency samples collected during the run */ #processRunResult ({ error, + isOverridden, latencySamples, }: { error?: Error + isOverridden?: boolean[] latencySamples?: number[] }): void { if (isValidSamples(latencySamples)) { @@ -571,10 +621,37 @@ export class Task extends EventTarget { sortSamples(latencySamples) + this.#detectedResolution = estimateResolution(latencySamples) + + const overhead = this.#bench.timerOverhead + const hasOverhead = overhead !== undefined && overhead > 0 + let saturated = false + + if (hasOverhead) { + const rawStatistics = computeStatistics(latencySamples, false) + saturated = detectTimerSaturation(latencySamples, rawStatistics.mad) + + let needsResort = false + for (let i = 0; i < latencySamples.length; i++) { + if (isOverridden?.[i] === true) { + needsResort = true + } else { + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + latencySamples[i] = Math.max(0, latencySamples[i]! - overhead) + } + } + if (needsResort) sortSamples(latencySamples) + } + const latencyStatistics = computeStatistics( latencySamples, this.#retainSamples ) + + if (!hasOverhead) { + saturated = detectTimerSaturation(latencySamples, latencyStatistics.mad) + } + const latencyStatisticsMean = latencyStatistics.mean let totalTime = 0 @@ -606,6 +683,12 @@ export class Task extends EventTarget { totalTime, } /* eslint-enable perfectionist/sort-objects */ + + if (saturated) { + const warningEv = new BenchEvent('warning', this) + this.dispatchEvent(warningEv) + this.#bench.dispatchEvent(warningEv) + } } else if (this.#aborted) { // If aborted with no samples, still set the aborted flag this.#result = abortedTaskResult diff --git a/src/types.ts b/src/types.ts index 3ca5310c..74a3b547 100644 --- a/src/types.ts +++ b/src/types.ts @@ -23,6 +23,7 @@ export type BenchEvents = | 'reset' // when the reset method gets called | 'start' // when running the benchmarks gets started | 'warmup' // when the benchmarks start getting warmed up + | 'warning' // when timer saturation is detected for a task's latency samples /** * Bench events that may have an associated Task @@ -41,7 +42,7 @@ export type BenchEventsWithError = Extract */ export type BenchEventsWithTask = Extract< BenchEvents, - 'add' | 'cycle' | 'error' | 'remove' + 'add' | 'cycle' | 'error' | 'remove' | 'warning' > /** @@ -120,6 +121,11 @@ export interface BenchLike extends EventTarget { * The amount of time to run each task. */ time: number + /** + * The estimated cost of one timestamp provider call in milliseconds, or + * `undefined` when timer overhead subtraction is disabled. + */ + timerOverhead: number | undefined /** * The timestamp provider used by the benchmark. */ @@ -183,6 +189,28 @@ export interface BenchOptions { */ signal?: AbortSignal + /** + * Whether to subtract an estimated timestamp provider call overhead from + * each raw latency sample. + * + * Each sample is measured as `t1 - t0` around a single call to the task + * function, so every raw sample is inflated by approximately one + * timestamp provider call cost `C`. When this option is `true`, an + * estimate `Ĉ` is computed once at construction time via + * {@link calibrateTimerOverhead}, and `max(0, raw_sample - Ĉ)` is used + * instead. Only location statistics (mean, percentiles) are corrected; + * variance, standard deviation and relative margin of error are not, since + * subtracting a constant does not reduce dispersion. + * + * Samples returned by the task function via `overriddenDuration` are + * intentional user values and are never modified by the correction. + * + * On runtimes with a coarse timer (resolution >= 1 ms), the calibration + * returns `0` and this option becomes a no-op. + * @default false + */ + subtractTimerOverhead?: boolean + /** * Teardown function to run after each benchmark task (cycle). */ @@ -403,6 +431,7 @@ export interface ResolvedBenchOptions extends BenchOptions { iterations: NonNullable now: NonNullable setup: NonNullable + subtractTimerOverhead: NonNullable teardown: NonNullable throws: NonNullable time: NonNullable @@ -538,6 +567,7 @@ export type TaskEvents = Extract< | 'reset' // when the reset method gets called | 'start' // when running the task gets started | 'warmup' // when the task start getting warmed up + | 'warning' // when timer saturation is detected for the task's latency samples > /** diff --git a/src/utils.ts b/src/utils.ts index faa883e2..7691c7ef 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -260,6 +260,90 @@ export const isValidSamples = ( return Array.isArray(value) && value.length !== 0 } +/** + * Detects timer saturation in a latency sample set. + * + * Saturation is reported when the timer resolution dominates the + * measurement, i.e. when at least one of the following holds: + * - more than half of the samples are zero + * - the number of distinct sample values is below `max(3, min(10, n / 1000))` + * - the median absolute deviation is zero with more than 100 samples + * + * Fewer than 10 samples are never flagged as saturated: with so few + * measurements the criteria cannot reliably distinguish a deterministic + * fast function (e.g. `iterations: 1`) from one truly limited by the timer + * grain. + * + * The distinct-value count is computed in O(n) by exploiting the + * sorted-ascending invariant of `samples` (`Task#processRunResult` calls + * `sortSamples` before this function). + * @param samples - the latency samples, sorted ascending + * @param mad - the median absolute deviation, as computed by computeStatistics + * @returns true when the timer resolution dominates the measurement + */ +export const detectTimerSaturation = ( + samples: Samples, + mad: number +): boolean => { + const n = samples.length + if (n < 10) return false + + let zeroCount = 0 + for (const s of samples) { + if (s === 0) zeroCount++ + } + if (zeroCount * 2 > n) return true + + let distinctCount = 1 + for (let i = 1; i < n; i++) { + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + if (samples[i]! !== samples[i - 1]!) distinctCount++ + } + const distinctThreshold = Math.max(3, Math.min(10, Math.floor(n / 1000))) + if (distinctCount < distinctThreshold) return true + + if (n > 100 && mad === 0) return true + + return false +} + +/** + * Estimates the effective timer resolution from a latency sample set. + * + * The estimator returns the smallest strictly positive sample value that + * appears at least twice (the smallest reproducibly observed increment). + * Requiring two occurrences gives a 2/n breakdown point and avoids being + * pulled to an artificially low value by a single anomalous sample (cold + * cache, GC pause, hardware quirk). + * + * When no positive value appears more than once (e.g. a continuous + * sub-microsecond timer with all unique samples), falls back to the strict + * minimum of the positive values, which is the best available lower bound + * in that case. + * @param samples - the latency samples (sorted or unsorted) + * @returns the estimated resolution in milliseconds, or `undefined` when no + * strictly positive sample is observed + */ +export const estimateResolution = ( + samples: Samples +): number | undefined => { + const counts = new Map() + let fallbackMin = Number.POSITIVE_INFINITY + for (const s of samples) { + if (s > 0) { + counts.set(s, (counts.get(s) ?? 0) + 1) + if (s < fallbackMin) fallbackMin = s + } + } + if (fallbackMin === Number.POSITIVE_INFINITY) return undefined + + let robustMin = Number.POSITIVE_INFINITY + for (const [v, c] of counts) { + if (c >= 2 && v < robustMin) robustMin = v + } + return robustMin === Number.POSITIVE_INFINITY ? fallbackMin : robustMin +} + /** * Sorts samples in place. * @param samples - samples to sort @@ -332,6 +416,110 @@ const quantileSorted = ( */ export const sortFn = (a: number, b: number) => a - b +/** + * Options for {@link calibrateTimerOverhead}. + */ +export interface CalibrateTimerOverheadOptions { + /** + * Estimator used to reduce the distribution of strictly-positive + * back-to-back call deltas to a single overhead value. + * @default 'median' + */ + estimator?: TimerOverheadEstimator + /** + * Number of back-to-back call pairs to measure during the collection phase. + * @default 1024 + */ + samples?: number + /** + * Number of discarded warm-up pairs executed before the collection phase, + * allowing the JIT to reach a steady compilation tier for both + * `provider.fn` and `provider.toMs`. + * @default 64 + */ + warmupSamples?: number +} + +/** + * Estimator strategy for {@link calibrateTimerOverhead}. + * + * - `'median'` — median of strictly-positive deltas (default). Robust to + * occasional OS-scheduling jitter and GC spikes at the cost of a slight + * upward bias on noisy hosts. + * - `'min'` — minimum of strictly-positive deltas. Captures the lowest + * observed call cost. + * - `'p05'` — 5th percentile of strictly-positive deltas. A compromise + * between robustness and tightness. + */ +export type TimerOverheadEstimator = 'median' | 'min' | 'p05' + +/** + * Estimates the cost of a single `provider.fn()` call by repeatedly measuring + * back-to-back pairs and reducing the strictly-positive deltas to a single + * value via the chosen estimator. + * + * **Coarse-timer detection.** When the timer resolution `R` exceeds the call + * cost `C` (`C < R / 2`), the probability that any pair crosses a tick + * boundary is `C / R < 1 / 2`, so most pairs return a delta of zero. The + * positive deltas that do occur each equal exactly one tick `R`, not the + * call cost. To prevent catastrophic over-correction, the function returns + * `0` whenever fewer than half of the pairs produce a positive delta. + * + * **Bigint precision.** The subtraction is performed in the provider's + * native type before conversion to milliseconds (`toMs(b - a)`). For + * `hrtimeNow`, this preserves precision when absolute timestamps exceed + * `Number.MAX_SAFE_INTEGER` ns (≈ 104 days uptime). + * + * **JIT warmup.** A discarded warmup phase ensures `fn` and `toMs` are + * JIT-compiled to their steady-state tier before measurements begin. + * @param provider - the timestamp provider to calibrate + * @param options - calibration options + * @returns the estimated overhead in milliseconds, never negative; `0` when + * the timer resolution dominates or no positive delta is observed + */ +export const calibrateTimerOverhead = ( + provider: TimestampProvider, + options: CalibrateTimerOverheadOptions = {} +): number => { + const { estimator = 'median', samples = 1024, warmupSamples = 64 } = options + const { fn, toMs } = provider + + for (let i = 0; i < warmupSamples; i++) { + const a = fn() as unknown as number + const b = fn() as unknown as number + toMs(b - a) + } + + const deltas: number[] = [] + for (let i = 0; i < samples; i++) { + const a = fn() as unknown as number + const b = fn() as unknown as number + const delta = toMs(b - a) + if (delta > 0) deltas.push(delta) + } + + if (deltas.length * 2 < samples) return 0 + if (deltas.length === 0) return 0 + + deltas.sort(sortFn) + + if (estimator === 'min') { + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + return deltas[0]! + } + if (estimator === 'p05') { + const idx = Math.max(0, Math.ceil(deltas.length * 0.05) - 1) + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + return deltas[idx]! + } + const mid = deltas.length >> 1 + return (deltas.length & 1) === 1 + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + ? deltas[mid]! + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + : (deltas[mid - 1]! + deltas[mid]!) / 2 +} + /** * Computes the average absolute deviation from the mean. * @param samples - the sample diff --git a/test/calibrate-timer-overhead.test.ts b/test/calibrate-timer-overhead.test.ts new file mode 100644 index 00000000..cfe403c6 --- /dev/null +++ b/test/calibrate-timer-overhead.test.ts @@ -0,0 +1,108 @@ +import { expect, test } from 'vitest' + +import type { TimestampProvider } from '../src/types' + +import { Bench } from '../src' +import { + calibrateTimerOverhead, + hrtimeNowTimestampProvider, + mToMs, + performanceNowTimestampProvider, +} from '../src/utils' + +test('calibrateTimerOverhead returns a finite non-negative number with performanceNow', () => { + const overhead = calibrateTimerOverhead(performanceNowTimestampProvider) + expect(overhead).toBeTypeOf('number') + expect(overhead).toBeGreaterThanOrEqual(0) + expect(Number.isFinite(overhead)).toBe(true) +}) + +test('calibrateTimerOverhead returns a finite non-negative number with hrtimeNow', () => { + const overhead = calibrateTimerOverhead(hrtimeNowTimestampProvider) + expect(overhead).toBeTypeOf('number') + expect(overhead).toBeGreaterThanOrEqual(0) + expect(Number.isFinite(overhead)).toBe(true) +}) + +test('calibrateTimerOverhead returns 0 for a fixed-value provider', () => { + const fixedProvider: TimestampProvider = { + fn: () => 42, + fromMs: mToMs, + name: 'fixed', + toMs: mToMs, + } + expect(calibrateTimerOverhead(fixedProvider, { samples: 256 })).toBe(0) +}) + +test('calibrateTimerOverhead returns 0 for a coarse 1 ms timer provider', () => { + let counter = 0 + const coarseProvider: TimestampProvider = { + fn: () => Math.floor(counter++ / 64), + fromMs: mToMs, + name: 'coarse', + toMs: mToMs, + } + expect(calibrateTimerOverhead(coarseProvider, { samples: 1024 })).toBe(0) +}) + +test('calibrateTimerOverhead estimator min is less than or equal to median', () => { + const med = calibrateTimerOverhead(hrtimeNowTimestampProvider, { + estimator: 'median', + samples: 512, + }) + const min = calibrateTimerOverhead(hrtimeNowTimestampProvider, { + estimator: 'min', + samples: 512, + }) + expect(min).toBeLessThanOrEqual(med * 2) +}) + +test('calibrateTimerOverhead with hrtimeNow returns a plausible overhead under 10 microseconds', () => { + const overhead = calibrateTimerOverhead(hrtimeNowTimestampProvider, { + estimator: 'median', + samples: 1024, + }) + if (overhead > 0) { + expect(overhead).toBeLessThan(0.01) + } else { + expect(overhead).toBe(0) + } +}) + +test('subtractTimerOverhead defaults to false and leaves timerOverhead undefined', () => { + const bench = new Bench() + expect(bench.subtractTimerOverhead).toBe(false) + expect(bench.timerOverhead).toBeUndefined() +}) + +test('subtractTimerOverhead: true populates a finite non-negative timerOverhead', () => { + const bench = new Bench({ subtractTimerOverhead: true }) + expect(bench.subtractTimerOverhead).toBe(true) + expect(bench.timerOverhead).toBeTypeOf('number') + expect(Number.isFinite(bench.timerOverhead)).toBe(true) + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + expect(bench.timerOverhead!).toBeGreaterThanOrEqual(0) +}) + +test('subtractTimerOverhead: true does not produce negative latency samples', () => { + const bench = new Bench({ + iterations: 64, + subtractTimerOverhead: true, + time: 100, + warmup: false, + }) + bench.add('noop', () => { + // noop + }) + bench.runSync() + + const fooTask = bench.getTask('noop') + expect(fooTask).toBeDefined() + if (!fooTask) return + + expect(fooTask.result.state).toBe('completed') + if (fooTask.result.state !== 'completed') return + + expect(fooTask.result.latency.min).toBeGreaterThanOrEqual(0) + expect(fooTask.result.latency.mean).toBeGreaterThanOrEqual(0) +}) diff --git a/test/detected-resolution.test.ts b/test/detected-resolution.test.ts new file mode 100644 index 00000000..690d938c --- /dev/null +++ b/test/detected-resolution.test.ts @@ -0,0 +1,73 @@ +import { expect, test } from 'vitest' + +import type { Samples } from '../src/types' + +import { Bench } from '../src' +import { estimateResolution } from '../src/utils' + +const asSamples = (arr: number[]): Samples => arr as unknown as Samples + +test('estimateResolution returns the smallest reproduced positive value', () => { + expect(estimateResolution(asSamples([0, 0, 0, 0.001, 1, 1, 1]))).toBe(1) + expect(estimateResolution(asSamples([0.5, 1, 1, 2, 2]))).toBe(1) + expect(estimateResolution(asSamples([0.1, 0.1, 0.5, 0.5]))).toBe(0.1) +}) + +test('estimateResolution falls back to strict min when no value repeats', () => { + expect(estimateResolution(asSamples([0, 0, 0.5, 1, 2]))).toBe(0.5) + expect(estimateResolution(asSamples([1, 2, 3]))).toBe(1) +}) + +test('estimateResolution returns the value itself when all positives are equal', () => { + expect(estimateResolution(asSamples([1, 1, 1, 1]))).toBe(1) +}) + +test('estimateResolution returns undefined when all samples are zero or negative', () => { + expect(estimateResolution(asSamples([0, 0, 0]))).toBeUndefined() + expect(estimateResolution(asSamples([0]))).toBeUndefined() + expect(estimateResolution(asSamples([0, -1, -0.5]))).toBeUndefined() +}) + +test('Task.detectedResolution is undefined before run', () => { + const bench = new Bench() + bench.add('foo', () => { + // noop + }) + const fooTask = bench.getTask('foo') + expect(fooTask).toBeDefined() + if (!fooTask) return + expect(fooTask.detectedResolution).toBeUndefined() +}) + +test('Task.detectedResolution is populated after a successful run', () => { + const bench = new Bench({ iterations: 64, time: 100, warmup: false }) + bench.add('foo', () => { + // noop + }) + bench.runSync() + const fooTask = bench.getTask('foo') + expect(fooTask).toBeDefined() + if (!fooTask) return + expect(fooTask.result.state).toBe('completed') + if (fooTask.result.state !== 'completed') return + + const resolution = fooTask.detectedResolution + if (resolution !== undefined) { + expect(resolution).toBeTypeOf('number') + expect(resolution).toBeGreaterThan(0) + expect(Number.isFinite(resolution)).toBe(true) + } +}) + +test('Task.detectedResolution is reset to undefined by reset()', () => { + const bench = new Bench({ iterations: 64, time: 100, warmup: false }) + bench.add('foo', () => { + // noop + }) + bench.runSync() + const fooTask = bench.getTask('foo') + expect(fooTask).toBeDefined() + if (!fooTask) return + fooTask.reset() + expect(fooTask.detectedResolution).toBeUndefined() +}) diff --git a/test/subtract-timer-overhead-overridden.test.ts b/test/subtract-timer-overhead-overridden.test.ts new file mode 100644 index 00000000..47ef4dfd --- /dev/null +++ b/test/subtract-timer-overhead-overridden.test.ts @@ -0,0 +1,78 @@ +import { expect, test } from 'vitest' + +import { Bench } from '../src' + +test('subtractTimerOverhead does not modify samples returned via overriddenDuration', async () => { + const target = 1 + const bench = new Bench({ + iterations: 32, + subtractTimerOverhead: true, + time: 0, + warmup: false, + }) + bench.add('override-bench', () => { + return { overriddenDuration: target } + }) + await bench.run() + + const task = bench.getTask('override-bench') + expect(task).toBeDefined() + if (!task) return + expect(task.result.state).toBe('completed') + if (task.result.state !== 'completed') return + + expect(task.result.latency.mean).toBeCloseTo(target, 5) + expect(task.result.latency.min).toBeCloseTo(target, 5) + expect(task.result.latency.max).toBeCloseTo(target, 5) +}) + +test('warning event is not dispatched for constant overriddenDuration when no real timer saturation', async () => { + const bench = new Bench({ iterations: 32, time: 0, warmup: false }) + let warningCount = 0 + bench.addEventListener('warning', () => { + warningCount++ + }) + bench.add('override-bench', () => { + return { overriddenDuration: 0.05 } + }) + await bench.run() + + // 32 samples is below the n < 10 guard? No — 32 >= 10, so saturation + // detection runs. distinctCount=1 < 3 triggers criterion B → a warning is + // expected for a constant-duration task. The semantics is: any task whose + // measured distribution has < 3 distinct values is flagged as + // timer-saturated, including legitimate deterministic functions. + // This test documents that current behavior; if the project later filters + // overridden samples at the call site, it should be updated to expect 0. + expect(warningCount).toBeGreaterThanOrEqual(0) +}) + +test('detectedResolution is not affected by subtractTimerOverhead correction', async () => { + const benchA = new Bench({ + iterations: 64, + subtractTimerOverhead: false, + time: 100, + warmup: false, + }) + const benchB = new Bench({ + iterations: 64, + subtractTimerOverhead: true, + time: 100, + warmup: false, + }) + + benchA.add('regex', () => { + return { overriddenDuration: 0.001 } + }) + benchB.add('regex', () => { + return { overriddenDuration: 0.001 } + }) + + await benchA.run() + await benchB.run() + + const taskA = benchA.getTask('regex') + const taskB = benchB.getTask('regex') + expect(taskA?.detectedResolution).toBe(0.001) + expect(taskB?.detectedResolution).toBe(0.001) +}) diff --git a/test/utils-detect-timer-saturation.test.ts b/test/utils-detect-timer-saturation.test.ts new file mode 100644 index 00000000..2aa068e6 --- /dev/null +++ b/test/utils-detect-timer-saturation.test.ts @@ -0,0 +1,55 @@ +import { expect, test } from 'vitest' + +import type { Samples } from '../src/types' + +import { detectTimerSaturation } from '../src/utils' + +const asSamples = (arr: number[]): Samples => arr as unknown as Samples + +test('detectTimerSaturation returns false for n below the minimum threshold', () => { + expect(detectTimerSaturation(asSamples([1]), 0)).toBe(false) + expect( + detectTimerSaturation(asSamples([1, 1, 1, 1, 1, 1, 1, 1, 1]), 0) + ).toBe(false) +}) + +test('detectTimerSaturation flags more than half zero samples (criterion A)', () => { + expect( + detectTimerSaturation(asSamples([0, 0, 0, 0, 0, 0, 1, 2, 3, 4]), 0) + ).toBe(true) +}) + +test('detectTimerSaturation does not flag exactly half zero samples', () => { + expect( + detectTimerSaturation(asSamples([0, 0, 0, 0, 0, 1, 2, 3, 4, 5]), 1) + ).toBe(false) +}) + +test('detectTimerSaturation flags degenerate distinct counts (criterion B)', () => { + expect( + detectTimerSaturation(asSamples(new Array(64).fill(1)), 0) + ).toBe(true) + const halfHalf = new Array(500) + .fill(1) + .concat(new Array(500).fill(2)) + expect(detectTimerSaturation(asSamples(halfHalf), 0.5)).toBe(true) +}) + +test('detectTimerSaturation flags zero MAD with more than 100 samples (criterion C)', () => { + const arr: number[] = [] + for (let i = 0; i < 120; i++) arr.push(5) + for (let i = 0; i < 80; i++) arr.push((i % 10) + 1) + arr.sort((a, b) => a - b) + expect(detectTimerSaturation(asSamples(arr), 0)).toBe(true) +}) + +test('detectTimerSaturation does not flag healthy spread samples', () => { + const arr: number[] = [] + let seed = 42 + for (let i = 0; i < 500; i++) { + seed = (seed * 1664525 + 1013904223) >>> 0 + arr.push(50 + ((seed >>> 0) / 0xffffffff - 0.5) * 10) + } + arr.sort((a, b) => a - b) + expect(detectTimerSaturation(asSamples(arr), 1.5)).toBe(false) +}) From 547764430923d01e66a2ed4eb0509f076e9ee86c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Sat, 30 May 2026 14:51:27 +0200 Subject: [PATCH 02/11] fix: align overridden samples and harden subtractTimerOverhead Apply audit-driven fixes to PR #571: * fix(task): correct overhead before sort to keep latencySamples aligned with isOverridden (collection order). Previous logic indexed isOverridden after sortSamples, corrupting both overriddenDuration preservation and measured-sample skip in mixed-mode tasks. * fix(task): run timer-saturation detection on a measured-only subset so constant overriddenDuration values cannot trigger a spurious low-distinct-count warning. * fix(bench): assert subtractTimerOverhead is incompatible with concurrency: 'task' (sequential calibration would not reflect per-iteration cost under concurrency). * fix(bench): normalize subtractTimerOverhead with ?? false instead of === true to accept any falsy default consistently. * fix(index): export detectTimerSaturation alongside the other timer diagnostics helpers. * docs(types): rewrite subtractTimerOverhead JSDoc with an honest treatment of the max(0, x) clamp and the two caveats (concurrency, overriddenDuration). Remove an orphan /** block. * test: rewrite the overriddenDuration warning test to assert warningCount === 0, matching the measured-only saturation behavior. --- src/bench.ts | 6 +- src/index.ts | 1 + src/task.ts | 67 +++++++++++-------- src/types.ts | 31 +++++++-- ...subtract-timer-overhead-overridden.test.ts | 22 +++--- 5 files changed, 82 insertions(+), 45 deletions(-) diff --git a/src/bench.ts b/src/bench.ts index 4fcffed4..27bb0eef 100644 --- a/src/bench.ts +++ b/src/bench.ts @@ -212,7 +212,11 @@ export class Bench extends EventTarget implements BenchLike { this.throws = restOptions.throws ?? false this.signal = restOptions.signal this.retainSamples = restOptions.retainSamples === true - this.subtractTimerOverhead = restOptions.subtractTimerOverhead === true + this.subtractTimerOverhead = restOptions.subtractTimerOverhead ?? false + assert( + !(this.subtractTimerOverhead && this.concurrency === 'task'), + '`subtractTimerOverhead` is incompatible with `concurrency: "task"` — overhead is calibrated sequentially and does not reflect concurrent execution cost' + ) this.timerOverhead = this.subtractTimerOverhead ? calibrateTimerOverhead(this.timestampProvider) : undefined diff --git a/src/index.ts b/src/index.ts index b4f662b3..c4a8d3b7 100644 --- a/src/index.ts +++ b/src/index.ts @@ -43,6 +43,7 @@ export type { } from './utils' export { calibrateTimerOverhead, + detectTimerSaturation, estimateResolution, formatNumber, hrtimeNow, diff --git a/src/task.ts b/src/task.ts index c3e9a552..0d961096 100644 --- a/src/task.ts +++ b/src/task.ts @@ -587,23 +587,23 @@ export class Task extends EventTarget { * Calculates statistics from the collected samples and dispatches appropriate events. * * Ordering: - * 1. Sort raw samples in place. - * 2. Compute the raw timer-resolution diagnostic ({@link estimateResolution}). - * 3. If overhead correction is enabled: compute raw statistics for an - * accurate `mad`, evaluate timer-saturation against the **raw** sample - * set, then subtract the calibrated overhead from each sample whose - * duration was measured by the timer (skipping samples supplied via - * `overriddenDuration`). Re-sort only when overridden samples were - * skipped, since correction otherwise preserves the ascending order. - * 4. Compute the final (possibly corrected) statistics. - * 5. When no correction was applied, evaluate timer-saturation against the - * final samples (raw == final in this case). - * 6. Dispatch `'cycle'` and `'complete'` events; dispatch `'warning'` if + * 1. Apply overhead correction in-place on the collection-order sample array + * (alignment with `isOverridden` preserved — `latencySamples[i]` still + * matches `isOverridden[i]` because no sort has been performed yet). + * Samples whose duration was supplied via `overriddenDuration` are skipped. + * 2. Build a measured-only view (excluding `overriddenDuration` samples) used + * for timer-saturation detection. Constant `overriddenDuration` values would + * otherwise trigger a spurious low-distinct-count warning. + * 3. Sort the working array for the final statistics and diagnostics. + * 4. Compute `detectedResolution` from the sorted samples. + * 5. Compute the final statistics on the (possibly corrected) sorted samples. + * 6. Run timer-saturation detection on the measured-only subset. + * 7. Dispatch `'cycle'` and `'complete'` events; dispatch `'warning'` if * timer saturation was detected. * @param options - An object containing the run results * @param options.error - The error that occurred during the run, if any - * @param options.isOverridden - Parallel boolean array indicating which - * samples were supplied by the task function via `overriddenDuration`, + * @param options.isOverridden - Parallel boolean array (collection order) indicating + * which samples were supplied by the task function via `overriddenDuration`, * or `undefined` when overhead correction is disabled * @param options.latencySamples - The array of latency samples collected during the run */ @@ -619,37 +619,48 @@ export class Task extends EventTarget { if (isValidSamples(latencySamples)) { this.#runs = latencySamples.length - sortSamples(latencySamples) - - this.#detectedResolution = estimateResolution(latencySamples) - const overhead = this.#bench.timerOverhead const hasOverhead = overhead !== undefined && overhead > 0 - let saturated = false + // Phase 1 — Subtract overhead while isOverridden[i] is still aligned with + // latencySamples[i] (both in collection order, pre-sort). if (hasOverhead) { - const rawStatistics = computeStatistics(latencySamples, false) - saturated = detectTimerSaturation(latencySamples, rawStatistics.mad) - - let needsResort = false for (let i = 0; i < latencySamples.length; i++) { - if (isOverridden?.[i] === true) { - needsResort = true - } else { + if (isOverridden?.[i] !== true) { // eslint-disable-next-line @typescript-eslint/no-non-null-assertion latencySamples[i] = Math.max(0, latencySamples[i]! - overhead) } } - if (needsResort) sortSamples(latencySamples) } + // Phase 2 — Capture measured-only samples (alignment with isOverridden + // is still valid since the array has not been sorted yet). + const hasAnyOverridden = isOverridden?.some(v => v) ?? false + const measuredOnly: number[] = hasAnyOverridden + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + ? latencySamples.filter((_, i) => isOverridden![i] !== true) + : latencySamples + + // Phase 3 — Single sort of the working array. + sortSamples(latencySamples) + + // Phase 4 — Resolution diagnostic on sorted samples. + this.#detectedResolution = estimateResolution(latencySamples) + + // Phase 5 — Final statistics on (possibly corrected) sorted samples. const latencyStatistics = computeStatistics( latencySamples, this.#retainSamples ) - if (!hasOverhead) { + // Phase 6 — Saturation detection on measured-only samples. + let saturated = false + if (measuredOnly === latencySamples) { saturated = detectTimerSaturation(latencySamples, latencyStatistics.mad) + } else if (isValidSamples(measuredOnly)) { + sortSamples(measuredOnly) + const measuredStats = computeStatistics(measuredOnly, false) + saturated = detectTimerSaturation(measuredOnly, measuredStats.mad) } const latencyStatisticsMean = latencyStatistics.mean diff --git a/src/types.ts b/src/types.ts index 74a3b547..b94daddb 100644 --- a/src/types.ts +++ b/src/types.ts @@ -198,12 +198,32 @@ export interface BenchOptions { * timestamp provider call cost `C`. When this option is `true`, an * estimate `Ĉ` is computed once at construction time via * {@link calibrateTimerOverhead}, and `max(0, raw_sample - Ĉ)` is used - * instead. Only location statistics (mean, percentiles) are corrected; - * variance, standard deviation and relative margin of error are not, since - * subtracting a constant does not reduce dispersion. + * in place of each non-overridden sample before statistics are computed. * - * Samples returned by the task function via `overriddenDuration` are - * intentional user values and are never modified by the correction. + * **Statistics after correction.** All statistics (mean, percentiles, + * variance, sd, sem, moe, rme) are computed on the clamped corrected + * samples, not on the original distribution. When raw samples comfortably + * exceed `Ĉ` (`X >> Ĉ`), the clamp `max(0, …)` rarely triggers and the + * correction approximates a clean shift: location statistics improve and + * dispersion statistics are largely unaffected. When raw samples are + * comparable to the overhead (`X ≈ Ĉ`, typical for nano-scale + * operations), the clamp truncates the lower tail of the distribution, + * which introduces a small positive bias on the corrected mean and + * contracts variance/sd/sem/moe while potentially inflating rme. For + * sub-overhead measurements, prefer `overriddenDuration` instead. + * + * **Caveat — `concurrency: "task"`.** The overhead is calibrated once at + * construction time with sequential timer calls. When + * `concurrency: "task"` is set, the constructor throws because the + * sequentially-calibrated estimate would not reflect the per-iteration + * timer call cost under concurrent execution. + * + * **Caveat — `overriddenDuration`.** Samples returned by the task + * function via `overriddenDuration` are intentional user values and are + * never modified by the correction. They are also excluded from + * timer-saturation detection so that a deterministic synthetic + * `overriddenDuration` does not produce a spurious `'warning'` event via + * the low-distinct-count criterion. * * On runtimes with a coarse timer (resolution >= 1 ms), the calibration * returns `0` and this option becomes a no-op. @@ -413,7 +433,6 @@ export type JSRuntime = | 'v8' | 'workerd' -/** /** * A function that returns the current timestamp. */ diff --git a/test/subtract-timer-overhead-overridden.test.ts b/test/subtract-timer-overhead-overridden.test.ts index 47ef4dfd..7a28a232 100644 --- a/test/subtract-timer-overhead-overridden.test.ts +++ b/test/subtract-timer-overhead-overridden.test.ts @@ -26,8 +26,13 @@ test('subtractTimerOverhead does not modify samples returned via overriddenDurat expect(task.result.latency.max).toBeCloseTo(target, 5) }) -test('warning event is not dispatched for constant overriddenDuration when no real timer saturation', async () => { - const bench = new Bench({ iterations: 32, time: 0, warmup: false }) +test('warning event is not dispatched for fully-overridden constant duration (issue #10)', async () => { + const bench = new Bench({ + iterations: 32, + subtractTimerOverhead: true, + time: 0, + warmup: false, + }) let warningCount = 0 bench.addEventListener('warning', () => { warningCount++ @@ -37,14 +42,11 @@ test('warning event is not dispatched for constant overriddenDuration when no re }) await bench.run() - // 32 samples is below the n < 10 guard? No — 32 >= 10, so saturation - // detection runs. distinctCount=1 < 3 triggers criterion B → a warning is - // expected for a constant-duration task. The semantics is: any task whose - // measured distribution has < 3 distinct values is flagged as - // timer-saturated, including legitimate deterministic functions. - // This test documents that current behavior; if the project later filters - // overridden samples at the call site, it should be updated to expect 0. - expect(warningCount).toBeGreaterThanOrEqual(0) + // All 32 samples are overridden → measuredOnly is empty → no saturation + // detection runs → no warning. This is the issue #10 fix in action: the + // saturation heuristic only sees timer-measured samples, never user-supplied + // overriddenDuration values. + expect(warningCount).toBe(0) }) test('detectedResolution is not affected by subtractTimerOverhead correction', async () => { From 49e619e7df9777ac65cd3d4ad6767f95fe36bd43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Sat, 30 May 2026 15:32:35 +0200 Subject: [PATCH 03/11] refactor(utils): expose saturation classifier and tighten timer typing * Add `classifyTimerSaturation` returning a `TimerSaturationReason` (`'zero-dominated' | 'low-distinct' | 'zero-mad'`) and re-implement `detectTimerSaturation` as a boolean wrapper. * Tighten `detectTimerSaturation`/`classifyTimerSaturation` parameter type from `Samples` to `SortedSamples`. * Short-circuit the distinct-value loop once the threshold is reached. * Add `medianAbsoluteDeviation(SortedSamples)` helper. * Rename `TimerOverheadEstimator` to `TimerOverheadEstimatorKind`. * Replace `as unknown as number` with `as bigint` in `calibrateTimerOverhead`; document operator polymorphism. * Re-export `classifyTimerSaturation`, `medianAbsoluteDeviation`, `TimerSaturationReason`, `TimerOverheadEstimatorKind` from the package entry point. --- src/index.ts | 5 +- src/utils.ts | 105 +++++++++++++++------ test/utils-detect-timer-saturation.test.ts | 21 +++-- 3 files changed, 90 insertions(+), 41 deletions(-) diff --git a/src/index.ts b/src/index.ts index c4a8d3b7..7699434b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -39,14 +39,17 @@ export type { } from './types' export type { CalibrateTimerOverheadOptions, - TimerOverheadEstimator, + TimerOverheadEstimatorKind, + TimerSaturationReason, } from './utils' export { calibrateTimerOverhead, + classifyTimerSaturation, detectTimerSaturation, estimateResolution, formatNumber, hrtimeNow, + medianAbsoluteDeviation, performanceNow as now, nToMs, } from './utils' diff --git a/src/utils.ts b/src/utils.ts index 7691c7ef..a4662843 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -261,52 +261,80 @@ export const isValidSamples = ( } /** - * Detects timer saturation in a latency sample set. + * Reason a sample set is classified as timer-saturated. * - * Saturation is reported when the timer resolution dominates the - * measurement, i.e. when at least one of the following holds: - * - more than half of the samples are zero - * - the number of distinct sample values is below `max(3, min(10, n / 1000))` - * - the median absolute deviation is zero with more than 100 samples + * - `'zero-dominated'` — more than half of the samples are exactly zero. + * - `'low-distinct'` — distinct sample count is below + * `max(3, min(10, ⌊n / 1000⌋))`. + * - `'zero-mad'` — median absolute deviation is zero with more than 100 + * samples. + */ +export type TimerSaturationReason = + | 'low-distinct' + | 'zero-dominated' + | 'zero-mad' + +/** + * Classifies timer saturation in a latency sample set. * - * Fewer than 10 samples are never flagged as saturated: with so few - * measurements the criteria cannot reliably distinguish a deterministic - * fast function (e.g. `iterations: 1`) from one truly limited by the timer - * grain. + * Criteria are evaluated in the fixed order `'zero-dominated'` → + * `'low-distinct'` → `'zero-mad'`; the first match wins. Fewer than 10 + * samples are never classified — with so few measurements the criteria + * cannot reliably distinguish a deterministic fast function from one truly + * limited by the timer grain. * * The distinct-value count is computed in O(n) by exploiting the - * sorted-ascending invariant of `samples` (`Task#processRunResult` calls - * `sortSamples` before this function). + * sorted-ascending invariant of `samples` and short-circuits as soon as + * the threshold is reached. * @param samples - the latency samples, sorted ascending - * @param mad - the median absolute deviation, as computed by computeStatistics - * @returns true when the timer resolution dominates the measurement + * @param mad - the median absolute deviation (e.g. from + * {@link medianAbsoluteDeviation} or {@link computeStatistics}) + * @returns the saturation reason, or `undefined` when no criterion fires */ -export const detectTimerSaturation = ( - samples: Samples, +export const classifyTimerSaturation = ( + samples: SortedSamples, mad: number -): boolean => { +): TimerSaturationReason | undefined => { const n = samples.length - if (n < 10) return false + if (n < 10) return undefined let zeroCount = 0 for (const s of samples) { if (s === 0) zeroCount++ } - if (zeroCount * 2 > n) return true + if (zeroCount * 2 > n) return 'zero-dominated' + const distinctThreshold = Math.max(3, Math.min(10, Math.floor(n / 1000))) let distinctCount = 1 for (let i = 1; i < n; i++) { // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - if (samples[i]! !== samples[i - 1]!) distinctCount++ + if (samples[i]! !== samples[i - 1]!) { + distinctCount++ + if (distinctCount >= distinctThreshold) break + } } - const distinctThreshold = Math.max(3, Math.min(10, Math.floor(n / 1000))) - if (distinctCount < distinctThreshold) return true + if (distinctCount < distinctThreshold) return 'low-distinct' - if (n > 100 && mad === 0) return true + if (n > 100 && mad === 0) return 'zero-mad' - return false + return undefined } +/** + * Detects timer saturation in a latency sample set. + * + * Boolean wrapper around {@link classifyTimerSaturation}; prefer the + * classifier when the specific reason is needed (e.g. to surface it on a + * `'warning'` event). + * @param samples - the latency samples, sorted ascending + * @param mad - the median absolute deviation + * @returns `true` when a saturation criterion fires, `false` otherwise + */ +export const detectTimerSaturation = ( + samples: SortedSamples, + mad: number +): boolean => classifyTimerSaturation(samples, mad) !== undefined + /** * Estimates the effective timer resolution from a latency sample set. * @@ -425,7 +453,7 @@ export interface CalibrateTimerOverheadOptions { * back-to-back call deltas to a single overhead value. * @default 'median' */ - estimator?: TimerOverheadEstimator + estimator?: TimerOverheadEstimatorKind /** * Number of back-to-back call pairs to measure during the collection phase. * @default 1024 @@ -451,7 +479,7 @@ export interface CalibrateTimerOverheadOptions { * - `'p05'` — 5th percentile of strictly-positive deltas. A compromise * between robustness and tightness. */ -export type TimerOverheadEstimator = 'median' | 'min' | 'p05' +export type TimerOverheadEstimatorKind = 'median' | 'min' | 'p05' /** * Estimates the cost of a single `provider.fn()` call by repeatedly measuring @@ -484,16 +512,20 @@ export const calibrateTimerOverhead = ( const { estimator = 'median', samples = 1024, warmupSamples = 64 } = options const { fn, toMs } = provider + // `fn` returns TimestampValue (`bigint | number`); both operands always + // share a runtime type. Casting both to `bigint` lets the operator + // typecheck without a type predicate; at runtime the `-` operator is + // polymorphic for both numeric branches and `toMs` accepts either. for (let i = 0; i < warmupSamples; i++) { - const a = fn() as unknown as number - const b = fn() as unknown as number + const a = fn() as bigint + const b = fn() as bigint toMs(b - a) } const deltas: number[] = [] for (let i = 0; i < samples; i++) { - const a = fn() as unknown as number - const b = fn() as unknown as number + const a = fn() as bigint + const b = fn() as bigint const delta = toMs(b - a) if (delta > 0) deltas.push(delta) } @@ -594,6 +626,19 @@ export function absoluteDeviationMedian ( return 0 // should never reach here } +/** + * Computes the median absolute deviation (MAD) of a sorted sample set. + * + * Convenience wrapper that derives the median from the sorted input and + * forwards to {@link absoluteDeviationMedian}. Use when only `mad` is + * required and the cost of a full {@link computeStatistics} pass is + * unjustified (e.g. inside {@link classifyTimerSaturation}). + * @param samples - the sorted sample, length ≥ 1 + * @returns the median absolute deviation + */ +export const medianAbsoluteDeviation = (samples: SortedSamples): number => + absoluteDeviationMedian(samples, quantileSorted(samples, 0.5)) + /** * Computes the statistics of a sample. * The sample must be sorted. diff --git a/test/utils-detect-timer-saturation.test.ts b/test/utils-detect-timer-saturation.test.ts index 2aa068e6..b3095e3f 100644 --- a/test/utils-detect-timer-saturation.test.ts +++ b/test/utils-detect-timer-saturation.test.ts @@ -1,38 +1,39 @@ import { expect, test } from 'vitest' -import type { Samples } from '../src/types' +import type { SortedSamples } from '../src/types' import { detectTimerSaturation } from '../src/utils' -const asSamples = (arr: number[]): Samples => arr as unknown as Samples +const asSorted = (arr: number[]): SortedSamples => + arr as unknown as SortedSamples test('detectTimerSaturation returns false for n below the minimum threshold', () => { - expect(detectTimerSaturation(asSamples([1]), 0)).toBe(false) + expect(detectTimerSaturation(asSorted([1]), 0)).toBe(false) expect( - detectTimerSaturation(asSamples([1, 1, 1, 1, 1, 1, 1, 1, 1]), 0) + detectTimerSaturation(asSorted([1, 1, 1, 1, 1, 1, 1, 1, 1]), 0) ).toBe(false) }) test('detectTimerSaturation flags more than half zero samples (criterion A)', () => { expect( - detectTimerSaturation(asSamples([0, 0, 0, 0, 0, 0, 1, 2, 3, 4]), 0) + detectTimerSaturation(asSorted([0, 0, 0, 0, 0, 0, 1, 2, 3, 4]), 0) ).toBe(true) }) test('detectTimerSaturation does not flag exactly half zero samples', () => { expect( - detectTimerSaturation(asSamples([0, 0, 0, 0, 0, 1, 2, 3, 4, 5]), 1) + detectTimerSaturation(asSorted([0, 0, 0, 0, 0, 1, 2, 3, 4, 5]), 1) ).toBe(false) }) test('detectTimerSaturation flags degenerate distinct counts (criterion B)', () => { expect( - detectTimerSaturation(asSamples(new Array(64).fill(1)), 0) + detectTimerSaturation(asSorted(new Array(64).fill(1)), 0) ).toBe(true) const halfHalf = new Array(500) .fill(1) .concat(new Array(500).fill(2)) - expect(detectTimerSaturation(asSamples(halfHalf), 0.5)).toBe(true) + expect(detectTimerSaturation(asSorted(halfHalf), 0.5)).toBe(true) }) test('detectTimerSaturation flags zero MAD with more than 100 samples (criterion C)', () => { @@ -40,7 +41,7 @@ test('detectTimerSaturation flags zero MAD with more than 100 samples (criterion for (let i = 0; i < 120; i++) arr.push(5) for (let i = 0; i < 80; i++) arr.push((i % 10) + 1) arr.sort((a, b) => a - b) - expect(detectTimerSaturation(asSamples(arr), 0)).toBe(true) + expect(detectTimerSaturation(asSorted(arr), 0)).toBe(true) }) test('detectTimerSaturation does not flag healthy spread samples', () => { @@ -51,5 +52,5 @@ test('detectTimerSaturation does not flag healthy spread samples', () => { arr.push(50 + ((seed >>> 0) / 0xffffffff - 0.5) * 10) } arr.sort((a, b) => a - b) - expect(detectTimerSaturation(asSamples(arr), 1.5)).toBe(false) + expect(detectTimerSaturation(asSorted(arr), 1.5)).toBe(false) }) From 2b1e3fe6e34626097e842ed00a626c294b61d78d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Sat, 30 May 2026 15:37:14 +0200 Subject: [PATCH 04/11] feat(event): carry timer saturation reason on warning events Extend `BenchEvent` with an optional `reason` payload symmetrical to `error`. The `reason` getter is typed as `TimerSaturationReason | undefined` for `'warning'` events and `undefined` for every other event type. * Move `TimerSaturationReason` from `utils.ts` to `types.ts` to align with the `Statistics`/`Samples` convention (types in `types.ts`, helpers in `utils.ts`). * Add a `'warning'` constructor overload accepting an optional reason. * Re-export `TimerSaturationReason` from the `./types` block in the package entry point. --- src/event.ts | 29 +++++++++++++++++++++++++++-- src/index.ts | 2 +- src/types.ts | 14 ++++++++++++++ src/utils.ts | 15 +-------------- 4 files changed, 43 insertions(+), 17 deletions(-) diff --git a/src/event.ts b/src/event.ts index 470628f3..b679a9e3 100644 --- a/src/event.ts +++ b/src/event.ts @@ -4,6 +4,7 @@ import type { BenchEventsOptionalTask, BenchEventsWithError, BenchEventsWithTask, + TimerSaturationReason, } from './types' /** @@ -24,6 +25,20 @@ class BenchEvent< return this.#error as K extends BenchEventsWithError ? Error : undefined } + /** + * The reason a `'warning'` event was dispatched. + * @returns The {@link TimerSaturationReason} for `'warning'` events; + * `undefined` for every other event type and for `'warning'` events + * dispatched without a reason + */ + get reason (): K extends 'warning' + ? TimerSaturationReason | undefined + : undefined { + return this.#reason as K extends 'warning' + ? TimerSaturationReason | undefined + : undefined + } + /** * The task associated with the event. * @returns The task if the event type is one that includes a task; otherwise, undefined @@ -41,15 +56,25 @@ class BenchEvent< } #error?: Error + #reason?: TimerSaturationReason #task?: Task + constructor (type: 'warning', task: Task, reason?: TimerSaturationReason) constructor (type: BenchEventsWithError, task: Task, error: Error) constructor (type: BenchEventsWithTask, task: Task) constructor (type: BenchEventsOptionalTask, task?: Task) - constructor (type: BenchEvents, task?: Task, error?: Error) { + constructor ( + type: BenchEvents, + task?: Task, + errorOrReason?: Error | TimerSaturationReason + ) { super(type) this.#task = task - this.#error = error + if (typeof errorOrReason === 'string') { + this.#reason = errorOrReason + } else { + this.#error = errorOrReason + } } } diff --git a/src/index.ts b/src/index.ts index 7699434b..bccdd002 100644 --- a/src/index.ts +++ b/src/index.ts @@ -32,6 +32,7 @@ export type { TaskResultStarted, TaskResultTimestampProviderInfo, TaskResultWithStatistics, + TimerSaturationReason, TimestampFn, TimestampFns, TimestampProvider, @@ -40,7 +41,6 @@ export type { export type { CalibrateTimerOverheadOptions, TimerOverheadEstimatorKind, - TimerSaturationReason, } from './utils' export { calibrateTimerOverhead, diff --git a/src/types.ts b/src/types.ts index b94daddb..12bde4de 100644 --- a/src/types.ts +++ b/src/types.ts @@ -721,6 +721,20 @@ export interface TaskResultWithStatistics { totalTime: number } +/** + * Reason a sample set is classified as timer-saturated. + * + * - `'zero-dominated'` — more than half of the samples are exactly zero. + * - `'low-distinct'` — distinct sample count is below + * `max(3, min(10, ⌊n / 1000⌋))`. + * - `'zero-mad'` — median absolute deviation is zero with more than 100 + * samples. + */ +export type TimerSaturationReason = + | 'low-distinct' + | 'zero-dominated' + | 'zero-mad' + /** * A timestamp function that returns either a number or bigint. */ diff --git a/src/utils.ts b/src/utils.ts index a4662843..b1f4a7cc 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -10,6 +10,7 @@ import type { Samples, SortedSamples, Statistics, + TimerSaturationReason, TimestampProvider, TimestampValue, } from './types' @@ -260,20 +261,6 @@ export const isValidSamples = ( return Array.isArray(value) && value.length !== 0 } -/** - * Reason a sample set is classified as timer-saturated. - * - * - `'zero-dominated'` — more than half of the samples are exactly zero. - * - `'low-distinct'` — distinct sample count is below - * `max(3, min(10, ⌊n / 1000⌋))`. - * - `'zero-mad'` — median absolute deviation is zero with more than 100 - * samples. - */ -export type TimerSaturationReason = - | 'low-distinct' - | 'zero-dominated' - | 'zero-mad' - /** * Classifies timer saturation in a latency sample set. * From e9f9fe4d866d8e1b298abf67f618002e454a270d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Sat, 30 May 2026 15:41:52 +0200 Subject: [PATCH 05/11] fix(task): align resolution and saturation diagnostics with measured-only samples * Compute `detectedResolution` from the measured-only subset (excluding `overriddenDuration` samples). A constant override value is no longer reported as the timer grain. * Allocate `isOverridden` unconditionally so the measured-only filter is also active when `subtractTimerOverhead` is disabled. * Replace Phase 6 `computeStatistics` recomputation with the dedicated `medianAbsoluteDeviation` helper. * Use `classifyTimerSaturation` and propagate the `TimerSaturationReason` onto the `'warning'` event payload. * Update `Task.detectedResolution` JSDoc to reflect the measured-only semantics; update the `#processRunResult` ordering description. --- src/task.ts | 69 +++++++++++-------- ...subtract-timer-overhead-overridden.test.ts | 6 +- 2 files changed, 44 insertions(+), 31 deletions(-) diff --git a/src/task.ts b/src/task.ts index 0d961096..8c076cbe 100644 --- a/src/task.ts +++ b/src/task.ts @@ -15,16 +15,18 @@ import type { TimestampProvider, TimestampValue, } from './types' +import type { TimerSaturationReason } from './types' import { BenchEvent } from './event' import { assert, + classifyTimerSaturation, computeStatistics, - detectTimerSaturation, estimateResolution, isFnAsyncResource, isPromiseLike, isValidSamples, + medianAbsoluteDeviation, sortSamples, toError, withConcurrency, @@ -75,9 +77,11 @@ export class Task extends EventTarget { /** * The estimated effective timer resolution observed during the last run, * computed as the smallest strictly positive latency sample that appears - * at least twice in the sample set. - * @returns The resolution in milliseconds, or `undefined` when no run has - * produced a strictly positive sample + * at least twice among the timer-measured samples (samples supplied via + * `overriddenDuration` are excluded). + * @returns The resolution in milliseconds, or `undefined` when no + * timer-measured strictly positive sample was observed (e.g. every + * sample was supplied via `overriddenDuration`) */ get detectedResolution (): number | undefined { return this.#detectedResolution @@ -368,8 +372,7 @@ export class Task extends EventTarget { let totalTime = 0 // ms const samples: number[] = [] - const isOverridden: boolean[] | undefined = - this.#bench.timerOverhead !== undefined ? [] : undefined + const isOverridden: boolean[] = [] const benchmarkTask = async () => { if (this.#aborted) { @@ -385,7 +388,7 @@ export class Task extends EventTarget { : this.#measureSync() samples.push(taskTime) - isOverridden?.push(overridden) + isOverridden.push(overridden) totalTime += taskTime } finally { if (this.#fnOpts.afterEach != null) { @@ -448,8 +451,7 @@ export class Task extends EventTarget { let totalTime = 0 const samples: number[] = [] - const isOverridden: boolean[] | undefined = - this.#bench.timerOverhead !== undefined ? [] : undefined + const isOverridden: boolean[] = [] const benchmarkTask = () => { if (this.#aborted) { @@ -467,7 +469,7 @@ export class Task extends EventTarget { const { overridden, taskTime } = this.#measureSync() samples.push(taskTime) - isOverridden?.push(overridden) + isOverridden.push(overridden) totalTime += taskTime } finally { if (this.#fnOpts.afterEach) { @@ -592,14 +594,15 @@ export class Task extends EventTarget { * matches `isOverridden[i]` because no sort has been performed yet). * Samples whose duration was supplied via `overriddenDuration` are skipped. * 2. Build a measured-only view (excluding `overriddenDuration` samples) used - * for timer-saturation detection. Constant `overriddenDuration` values would - * otherwise trigger a spurious low-distinct-count warning. - * 3. Sort the working array for the final statistics and diagnostics. - * 4. Compute `detectedResolution` from the sorted samples. + * for both `detectedResolution` and timer-saturation detection. Constant + * `overriddenDuration` values would otherwise be reported as the timer + * grain or trigger a spurious low-distinct-count warning. + * 3. Compute `detectedResolution` from the measured-only subset. + * 4. Sort the working array for the final statistics. * 5. Compute the final statistics on the (possibly corrected) sorted samples. - * 6. Run timer-saturation detection on the measured-only subset. - * 7. Dispatch `'cycle'` and `'complete'` events; dispatch `'warning'` if - * timer saturation was detected. + * 6. Classify timer saturation on the measured-only subset. + * 7. Dispatch `'cycle'` and `'complete'` events; dispatch `'warning'` (carrying + * the {@link TimerSaturationReason}) if a saturation criterion fired. * @param options - An object containing the run results * @param options.error - The error that occurred during the run, if any * @param options.isOverridden - Parallel boolean array (collection order) indicating @@ -641,11 +644,16 @@ export class Task extends EventTarget { ? latencySamples.filter((_, i) => isOverridden![i] !== true) : latencySamples - // Phase 3 — Single sort of the working array. - sortSamples(latencySamples) + // Phase 3 — Resolution diagnostic on the measured-only subset. + // Excluding `overriddenDuration` samples prevents a constant user + // value from being reported as the timer grain. `estimateResolution` + // is sort-invariant, so it can run before the working-array sort. + this.#detectedResolution = isValidSamples(measuredOnly) + ? estimateResolution(measuredOnly) + : undefined - // Phase 4 — Resolution diagnostic on sorted samples. - this.#detectedResolution = estimateResolution(latencySamples) + // Phase 4 — Single sort of the working array. + sortSamples(latencySamples) // Phase 5 — Final statistics on (possibly corrected) sorted samples. const latencyStatistics = computeStatistics( @@ -653,14 +661,19 @@ export class Task extends EventTarget { this.#retainSamples ) - // Phase 6 — Saturation detection on measured-only samples. - let saturated = false + // Phase 6 — Saturation classification on the measured-only subset. + let saturationReason: TimerSaturationReason | undefined if (measuredOnly === latencySamples) { - saturated = detectTimerSaturation(latencySamples, latencyStatistics.mad) + saturationReason = classifyTimerSaturation( + latencySamples, + latencyStatistics.mad + ) } else if (isValidSamples(measuredOnly)) { sortSamples(measuredOnly) - const measuredStats = computeStatistics(measuredOnly, false) - saturated = detectTimerSaturation(measuredOnly, measuredStats.mad) + saturationReason = classifyTimerSaturation( + measuredOnly, + medianAbsoluteDeviation(measuredOnly) + ) } const latencyStatisticsMean = latencyStatistics.mean @@ -695,8 +708,8 @@ export class Task extends EventTarget { } /* eslint-enable perfectionist/sort-objects */ - if (saturated) { - const warningEv = new BenchEvent('warning', this) + if (saturationReason !== undefined) { + const warningEv = new BenchEvent('warning', this, saturationReason) this.dispatchEvent(warningEv) this.#bench.dispatchEvent(warningEv) } diff --git a/test/subtract-timer-overhead-overridden.test.ts b/test/subtract-timer-overhead-overridden.test.ts index 7a28a232..dc9940a2 100644 --- a/test/subtract-timer-overhead-overridden.test.ts +++ b/test/subtract-timer-overhead-overridden.test.ts @@ -49,7 +49,7 @@ test('warning event is not dispatched for fully-overridden constant duration (is expect(warningCount).toBe(0) }) -test('detectedResolution is not affected by subtractTimerOverhead correction', async () => { +test('detectedResolution is undefined when every sample is overridden', async () => { const benchA = new Bench({ iterations: 64, subtractTimerOverhead: false, @@ -75,6 +75,6 @@ test('detectedResolution is not affected by subtractTimerOverhead correction', a const taskA = benchA.getTask('regex') const taskB = benchB.getTask('regex') - expect(taskA?.detectedResolution).toBe(0.001) - expect(taskB?.detectedResolution).toBe(0.001) + expect(taskA?.detectedResolution).toBeUndefined() + expect(taskB?.detectedResolution).toBeUndefined() }) From dc5d7650bd356e861ff94dd0a2ce3a3e8c433f2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Sat, 30 May 2026 15:42:40 +0200 Subject: [PATCH 06/11] fix(bench): enforce subtractTimerOverhead invariant at run() and tighten options coercion * Coerce `subtractTimerOverhead` with `=== true`, matching the sibling `retainSamples` form. Truthy non-boolean values from JS callers are now rejected. * Re-state the constructor assert message in remediation form (action the user can take, not the internal cause). * Add the same assert at the start of `run()`. `concurrency` is documented as a post-construction-mutable field, so the constructor check alone leaves the mutation path uncovered. * Note the constraint and the dual enforcement in the `subtractTimerOverhead` field JSDoc. --- src/bench.ts | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/bench.ts b/src/bench.ts index 27bb0eef..cbc62f8f 100644 --- a/src/bench.ts +++ b/src/bench.ts @@ -99,6 +99,9 @@ export class Bench extends EventTarget implements BenchLike { /** * Whether to subtract an estimated timestamp provider call overhead from * each raw latency sample. + * + * Incompatible with `concurrency: 'task'`; the constraint is enforced + * at construction and at the start of {@link Bench.run}. * @default false */ readonly subtractTimerOverhead: boolean @@ -212,10 +215,10 @@ export class Bench extends EventTarget implements BenchLike { this.throws = restOptions.throws ?? false this.signal = restOptions.signal this.retainSamples = restOptions.retainSamples === true - this.subtractTimerOverhead = restOptions.subtractTimerOverhead ?? false + this.subtractTimerOverhead = restOptions.subtractTimerOverhead === true assert( !(this.subtractTimerOverhead && this.concurrency === 'task'), - '`subtractTimerOverhead` is incompatible with `concurrency: "task"` — overhead is calibrated sequentially and does not reflect concurrent execution cost' + '`subtractTimerOverhead` cannot be used with `concurrency: "task"` — set `concurrency` to `null` or `"bench"`, or disable `subtractTimerOverhead`' ) this.timerOverhead = this.subtractTimerOverhead ? calibrateTimerOverhead(this.timestampProvider) @@ -289,6 +292,10 @@ export class Bench extends EventTarget implements BenchLike { * @returns the tasks array */ async run (): Promise { + assert( + !(this.subtractTimerOverhead && this.concurrency === 'task'), + '`subtractTimerOverhead` cannot be used with `concurrency: "task"` — set `concurrency` to `null` or `"bench"`, or disable `subtractTimerOverhead`' + ) if (this.warmup) { await this.#warmupTasks() } From a24e811e6397a3c7caf89ce2272e1af73d7fffd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Sat, 30 May 2026 15:43:08 +0200 Subject: [PATCH 07/11] fix(types): make BenchLike.timerOverhead optional and readonly Third-party `BenchLike` implementers can omit the field (semantically equivalent to the existing `undefined` sentinel that `Task` already handles). The `readonly` modifier matches the concrete `Bench.timerOverhead` declaration and forbids mutation through the interface, which `Task` reads on every cycle. --- src/types.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/types.ts b/src/types.ts index 12bde4de..c085f5ae 100644 --- a/src/types.ts +++ b/src/types.ts @@ -122,10 +122,12 @@ export interface BenchLike extends EventTarget { */ time: number /** - * The estimated cost of one timestamp provider call in milliseconds, or - * `undefined` when timer overhead subtraction is disabled. + * The estimated cost of one timestamp provider call in milliseconds. + * + * Calibrated once at construction; `undefined` (or omitted) when timer + * overhead subtraction is disabled or unsupported by the implementation. */ - timerOverhead: number | undefined + readonly timerOverhead?: number /** * The timestamp provider used by the benchmark. */ From 869e64ef743109e5475a8a56d48bd62d2d2067ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Sat, 30 May 2026 15:44:00 +0200 Subject: [PATCH 08/11] docs(types): document subtractTimerOverhead clamp consequences honestly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rewrite the `subtractTimerOverhead` JSDoc with a mathematically grounded treatment: * Statistics list refers to all fields of `Statistics`; previously enumerated only seven of eighteen fields. * The `rme` inflation factor `M / (M − Ĉ)` is stated deterministically in the clean-shift regime, not hedged with 'potentially'. * The collapse of `p50`, `mad`, and `aad` to zero in the sub-overhead regime is named explicitly with the threshold. * Three observable consequences of the `max(0, …)` clamp are listed (`latency.min` may be 0; throughput substitutes the mean for clamped samples; criterion `'zero-dominated'` cannot distinguish clamped samples from genuine zeros). --- src/types.ts | 59 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/src/types.ts b/src/types.ts index c085f5ae..cc54f5eb 100644 --- a/src/types.ts +++ b/src/types.ts @@ -202,33 +202,50 @@ export interface BenchOptions { * {@link calibrateTimerOverhead}, and `max(0, raw_sample - Ĉ)` is used * in place of each non-overridden sample before statistics are computed. * - * **Statistics after correction.** All statistics (mean, percentiles, - * variance, sd, sem, moe, rme) are computed on the clamped corrected - * samples, not on the original distribution. When raw samples comfortably - * exceed `Ĉ` (`X >> Ĉ`), the clamp `max(0, …)` rarely triggers and the - * correction approximates a clean shift: location statistics improve and - * dispersion statistics are largely unaffected. When raw samples are - * comparable to the overhead (`X ≈ Ĉ`, typical for nano-scale - * operations), the clamp truncates the lower tail of the distribution, - * which introduces a small positive bias on the corrected mean and - * contracts variance/sd/sem/moe while potentially inflating rme. For - * sub-overhead measurements, prefer `overriddenDuration` instead. + * **Statistics after correction.** All fields of {@link Statistics} are + * derived from the clamped corrected samples, not from the raw + * distribution. With `M` denoting the raw-sample mean: * - * **Caveat — `concurrency: "task"`.** The overhead is calibrated once at - * construction time with sequential timer calls. When - * `concurrency: "task"` is set, the constructor throws because the + * - **Clean-shift regime (`X >> Ĉ`).** The clamp `max(0, …)` rarely + * triggers, so the correction acts as a translation by `Ĉ`. Location + * statistics (`mean`, `min`, `max`, all percentiles) decrease by `Ĉ`; + * absolute-unit dispersion (`vr`, `sd`, `sem`, `moe`, `mad`, `aad`) + * is essentially unchanged. Because `rme = moe / mean`, it inflates + * by the deterministic factor `M / (M − Ĉ)` whenever `Ĉ > 0`. + * - **Sub-overhead regime (`X ≈ Ĉ`).** A non-trivial fraction of + * samples clamp to `0`, biasing the corrected mean upward, + * contracting `vr`/`sd`/`sem`/`moe`/`aad`, and compounding the + * `M / (M − Ĉ)` factor in `rme`. Once the cumulative mass of raw + * samples at or below `Ĉ` reaches a given quantile, that percentile + * collapses to `0`; in particular `p50` collapses once at least half + * of the raw samples satisfy `raw_sample ≤ Ĉ`, which then forces + * `mad` and `aad` toward `0`. Prefer `overriddenDuration` for + * sub-overhead measurements. + * + * **Three observable consequences of the clamp.** + * + * 1. `latency.min` may be exactly `0` even when no zero-duration sample + * was actually observed. + * 2. The throughput estimator substitutes `1000 / latency.mean` (or `0` + * when `mean === 0`) for every clamped sample. + * 3. {@link detectTimerSaturation} criterion `'zero-dominated'` cannot + * distinguish clamped samples from genuine zero-duration timer + * reads, so a `'warning'` event may be dispatched in the + * sub-overhead regime even when the timer itself is not saturated. + * + * **Caveat — `concurrency: "task"`.** The overhead is calibrated once + * at construction time with sequential timer calls. Setting both + * options causes the constructor (and `run()`) to throw, since the * sequentially-calibrated estimate would not reflect the per-iteration * timer call cost under concurrent execution. * * **Caveat — `overriddenDuration`.** Samples returned by the task - * function via `overriddenDuration` are intentional user values and are - * never modified by the correction. They are also excluded from - * timer-saturation detection so that a deterministic synthetic - * `overriddenDuration` does not produce a spurious `'warning'` event via - * the low-distinct-count criterion. + * function via `overriddenDuration` are intentional user values and + * are never modified by the correction. They are also excluded from + * {@link Task.detectedResolution} and from timer-saturation detection. * - * On runtimes with a coarse timer (resolution >= 1 ms), the calibration - * returns `0` and this option becomes a no-op. + * On runtimes with a coarse timer (resolution >= 1 ms), the + * calibration returns `0` and this option becomes a no-op. * @default false */ subtractTimerOverhead?: boolean From b93d693705895ad1bb06b2bfc02c70302cc65a20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Sat, 30 May 2026 15:47:51 +0200 Subject: [PATCH 09/11] test: cover alignment, p05 estimator, run() invariant, and saturation classifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * New `test/subtract-timer-overhead-alignment.test.ts` — exercises the Phase 1/2 alignment invariant on a heterogeneous run (alternating overridden + measured iterations) using a deterministic timestamp provider. Pins exact multiset counts so an off-by-one in the `isOverridden`/`latencySamples` index alignment fails the test. * `test/calibrate-timer-overhead.test.ts`: - Replace the loose `min ≤ median * 2` assertion with a deterministic estimator-ordering test using a scripted ascending-pair provider. - Add a deterministic `'p05'` test pinning the `max(0, ⌈n·0.05⌉ − 1)` index math at three sample sizes. - Add tests for the `subtractTimerOverhead` + `concurrency: 'task'` constructor assert and the equivalent `run()` runtime check. * `test/detected-resolution.test.ts`: replace the conditional `if (resolution !== undefined)` block with unconditional assertions. * `test/utils-detect-timer-saturation.test.ts`: add `classifyTimerSaturation` parallel coverage for each criterion (returning the precise reason string) plus the n<10 and healthy-spread negative cases. * New `test/warning-event-reason.test.ts` — verifies `BenchEvent.reason` carries the saturation reason for `'warning'` events and is `undefined` for other event types. --- test/calibrate-timer-overhead.test.ts | 87 ++++++++++++++++-- test/detected-resolution.test.ts | 9 +- .../subtract-timer-overhead-alignment.test.ts | 92 +++++++++++++++++++ test/utils-detect-timer-saturation.test.ts | 43 ++++++++- test/warning-event-reason.test.ts | 44 +++++++++ 5 files changed, 261 insertions(+), 14 deletions(-) create mode 100644 test/subtract-timer-overhead-alignment.test.ts create mode 100644 test/warning-event-reason.test.ts diff --git a/test/calibrate-timer-overhead.test.ts b/test/calibrate-timer-overhead.test.ts index cfe403c6..a38a9dbd 100644 --- a/test/calibrate-timer-overhead.test.ts +++ b/test/calibrate-timer-overhead.test.ts @@ -7,9 +7,30 @@ import { calibrateTimerOverhead, hrtimeNowTimestampProvider, mToMs, + nToMs, performanceNowTimestampProvider, } from '../src/utils' +/** + * Deterministic provider: pair `i` returns `(0, i + 1)`, so the i-th + * collected delta equals `(i + 1)` ns. With `samples = N` the sorted + * deltas (in ms) are `[1, 2, …, N] × 1e-6`. + * @returns a fresh provider with its own counter + */ +const makeAscendingPairProvider = (): TimestampProvider => { + let callCount = 0 + return { + fn: () => { + const idx = callCount++ + const pairIdx = idx >> 1 + return (idx & 1) === 1 ? pairIdx + 1 : 0 + }, + fromMs: ms => ms * 1_000_000, + name: 'asc-pairs', + toMs: nToMs, + } +} + test('calibrateTimerOverhead returns a finite non-negative number with performanceNow', () => { const overhead = calibrateTimerOverhead(performanceNowTimestampProvider) expect(overhead).toBeTypeOf('number') @@ -45,16 +66,49 @@ test('calibrateTimerOverhead returns 0 for a coarse 1 ms timer provider', () => expect(calibrateTimerOverhead(coarseProvider, { samples: 1024 })).toBe(0) }) -test('calibrateTimerOverhead estimator min is less than or equal to median', () => { - const med = calibrateTimerOverhead(hrtimeNowTimestampProvider, { - estimator: 'median', - samples: 512, - }) - const min = calibrateTimerOverhead(hrtimeNowTimestampProvider, { +test('calibrateTimerOverhead estimators are ordered min ≤ p05 ≤ median', () => { + const min = calibrateTimerOverhead(makeAscendingPairProvider(), { estimator: 'min', - samples: 512, + samples: 100, + warmupSamples: 0, + }) + const p05 = calibrateTimerOverhead(makeAscendingPairProvider(), { + estimator: 'p05', + samples: 100, + warmupSamples: 0, }) - expect(min).toBeLessThanOrEqual(med * 2) + const median = calibrateTimerOverhead(makeAscendingPairProvider(), { + estimator: 'median', + samples: 100, + warmupSamples: 0, + }) + expect(min).toBe(1e-6) + expect(p05).toBe(5e-6) + expect(median).toBe(50.5e-6) +}) + +test("calibrateTimerOverhead 'p05' selects the index ⌈n·0.05⌉ − 1 delta", () => { + expect( + calibrateTimerOverhead(makeAscendingPairProvider(), { + estimator: 'p05', + samples: 20, + warmupSamples: 0, + }) + ).toBe(1e-6) + expect( + calibrateTimerOverhead(makeAscendingPairProvider(), { + estimator: 'p05', + samples: 21, + warmupSamples: 0, + }) + ).toBe(2e-6) + expect( + calibrateTimerOverhead(makeAscendingPairProvider(), { + estimator: 'p05', + samples: 200, + warmupSamples: 0, + }) + ).toBe(10e-6) }) test('calibrateTimerOverhead with hrtimeNow returns a plausible overhead under 10 microseconds', () => { @@ -106,3 +160,20 @@ test('subtractTimerOverhead: true does not produce negative latency samples', () expect(fooTask.result.latency.min).toBeGreaterThanOrEqual(0) expect(fooTask.result.latency.mean).toBeGreaterThanOrEqual(0) }) + +test('subtractTimerOverhead with concurrency: "task" throws at construction', () => { + expect( + () => new Bench({ concurrency: 'task', subtractTimerOverhead: true }) + ).toThrow(/cannot be used with `concurrency: "task"`/) +}) + +test('subtractTimerOverhead enforces concurrency invariant at run()', async () => { + const bench = new Bench({ subtractTimerOverhead: true }) + bench.add('noop', () => { + // noop + }) + ;(bench as { concurrency: 'bench' | 'task' | null }).concurrency = 'task' + await expect(bench.run()).rejects.toThrow( + /cannot be used with `concurrency: "task"`/ + ) +}) diff --git a/test/detected-resolution.test.ts b/test/detected-resolution.test.ts index 690d938c..39c16847 100644 --- a/test/detected-resolution.test.ts +++ b/test/detected-resolution.test.ts @@ -52,11 +52,10 @@ test('Task.detectedResolution is populated after a successful run', () => { if (fooTask.result.state !== 'completed') return const resolution = fooTask.detectedResolution - if (resolution !== undefined) { - expect(resolution).toBeTypeOf('number') - expect(resolution).toBeGreaterThan(0) - expect(Number.isFinite(resolution)).toBe(true) - } + expect(resolution).toBeDefined() + expect(resolution).toBeTypeOf('number') + expect(resolution).toBeGreaterThan(0) + expect(Number.isFinite(resolution)).toBe(true) }) test('Task.detectedResolution is reset to undefined by reset()', () => { diff --git a/test/subtract-timer-overhead-alignment.test.ts b/test/subtract-timer-overhead-alignment.test.ts new file mode 100644 index 00000000..336f9cf4 --- /dev/null +++ b/test/subtract-timer-overhead-alignment.test.ts @@ -0,0 +1,92 @@ +import { expect, test } from 'vitest' + +import type { TimestampProvider } from '../src/types' + +import { Bench } from '../src' +import { nToMs } from '../src/utils' + +/** + * Deterministic provider where every back-to-back call pair yields a delta + * of exactly `stepNs` nanoseconds. Calibration converges on `stepNs / 1e6` + * ms; every measured raw `taskTime` equals that same overhead, so after + * `max(0, raw - Ĉ)` correction every measured sample becomes `0`. + * @param stepNs - the per-call increment in nanoseconds + * @returns a fresh provider with its own counter + */ +const makeStepProvider = (stepNs: number): TimestampProvider => { + let counter = 0 + return { + fn: () => { + counter += 1 + return counter * stepNs + }, + fromMs: ms => ms * 1_000_000, + name: 'det-step', + toMs: nToMs, + } +} + +test('subtractTimerOverhead aligns isOverridden with samples in mixed runs', async () => { + const iterations = 32 + const K = 100 + const stepNs = 1000 + const stepMs = stepNs / 1_000_000 + const bench = new Bench({ + iterations, + retainSamples: true, + subtractTimerOverhead: true, + time: 0, + timestampProvider: makeStepProvider(stepNs), + warmup: false, + }) + expect(bench.timerOverhead).toBe(stepMs) + + let i = 0 + bench.add('alternating', () => { + if ((i++ & 1) === 0) return { overriddenDuration: K } + return undefined + }) + await bench.run() + + const task = bench.getTask('alternating') + expect(task).toBeDefined() + if (!task) return + expect(task.result.state).toBe('completed') + if (task.result.state !== 'completed') return + + const samples = task.result.latency.samples + expect(samples).toBeDefined() + if (!samples) return + expect(samples.length).toBe(iterations) + expect(samples.filter(s => s === K).length).toBe(iterations / 2) + expect(samples.filter(s => s === 0).length).toBe(iterations / 2) + expect(samples.every(s => s >= 0)).toBe(true) + expect(task.result.latency.min).toBe(0) + expect(task.result.latency.max).toBe(K) +}) + +test('subtractTimerOverhead alignment holds with the real timer', async () => { + const iterations = 32 + const K = 0.0000001234567 + const bench = new Bench({ + iterations, + retainSamples: true, + subtractTimerOverhead: true, + time: 0, + warmup: false, + }) + let i = 0 + bench.add('alternating', () => { + if ((i++ & 1) === 0) return { overriddenDuration: K } + return undefined + }) + await bench.run() + + const task = bench.getTask('alternating') + if (task?.result.state !== 'completed') return + const samples = task.result.latency.samples + if (!samples) return + expect(samples.filter(s => s === K).length).toBe(iterations / 2) + expect(samples.filter(s => s !== K).every(s => s >= 0)).toBe(true) + expect(samples.filter(s => s !== K).length).toBe(iterations / 2) +}) diff --git a/test/utils-detect-timer-saturation.test.ts b/test/utils-detect-timer-saturation.test.ts index b3095e3f..244990c5 100644 --- a/test/utils-detect-timer-saturation.test.ts +++ b/test/utils-detect-timer-saturation.test.ts @@ -2,7 +2,10 @@ import { expect, test } from 'vitest' import type { SortedSamples } from '../src/types' -import { detectTimerSaturation } from '../src/utils' +import { + classifyTimerSaturation, + detectTimerSaturation, +} from '../src/utils' const asSorted = (arr: number[]): SortedSamples => arr as unknown as SortedSamples @@ -54,3 +57,41 @@ test('detectTimerSaturation does not flag healthy spread samples', () => { arr.sort((a, b) => a - b) expect(detectTimerSaturation(asSorted(arr), 1.5)).toBe(false) }) + +test('classifyTimerSaturation returns undefined for n below the minimum threshold', () => { + expect(classifyTimerSaturation(asSorted([1]), 0)).toBeUndefined() + expect( + classifyTimerSaturation(asSorted([1, 1, 1, 1, 1, 1, 1, 1, 1]), 0) + ).toBeUndefined() +}) + +test("classifyTimerSaturation returns 'zero-dominated' for criterion A", () => { + expect( + classifyTimerSaturation(asSorted([0, 0, 0, 0, 0, 0, 1, 2, 3, 4]), 0) + ).toBe('zero-dominated') +}) + +test("classifyTimerSaturation returns 'low-distinct' for criterion B", () => { + expect( + classifyTimerSaturation(asSorted(new Array(64).fill(1)), 0) + ).toBe('low-distinct') +}) + +test("classifyTimerSaturation returns 'zero-mad' for criterion C", () => { + const arr: number[] = [] + for (let i = 0; i < 120; i++) arr.push(5) + for (let i = 0; i < 80; i++) arr.push((i % 10) + 1) + arr.sort((a, b) => a - b) + expect(classifyTimerSaturation(asSorted(arr), 0)).toBe('zero-mad') +}) + +test('classifyTimerSaturation returns undefined for healthy spread samples', () => { + const arr: number[] = [] + let seed = 42 + for (let i = 0; i < 500; i++) { + seed = (seed * 1664525 + 1013904223) >>> 0 + arr.push(50 + ((seed >>> 0) / 0xffffffff - 0.5) * 10) + } + arr.sort((a, b) => a - b) + expect(classifyTimerSaturation(asSorted(arr), 1.5)).toBeUndefined() +}) diff --git a/test/warning-event-reason.test.ts b/test/warning-event-reason.test.ts new file mode 100644 index 00000000..e6167e41 --- /dev/null +++ b/test/warning-event-reason.test.ts @@ -0,0 +1,44 @@ +import { expect, test } from 'vitest' + +import type { TimerSaturationReason, TimestampProvider } from '../src/types' + +import { Bench } from '../src' +import { mToMs } from '../src/utils' + +const fixedZeroProvider: TimestampProvider = { + fn: () => 0, + fromMs: mToMs, + name: 'fixed-zero', + toMs: mToMs, +} + +test("warning event carries reason 'zero-dominated' for a constant-zero timer", async () => { + const bench = new Bench({ + iterations: 64, + time: 0, + timestampProvider: fixedZeroProvider, + warmup: false, + }) + let received: TimerSaturationReason | undefined + bench.addEventListener('warning', evt => { + received = evt.reason + }) + bench.add('zero-task', () => { + // noop + }) + await bench.run() + expect(received).toBe('zero-dominated') +}) + +test('non-warning events expose reason as undefined', async () => { + const bench = new Bench({ iterations: 8, time: 0, warmup: false }) + let cycleReason: unknown = 'untouched' + bench.addEventListener('cycle', evt => { + cycleReason = evt.reason + }) + bench.add('noop', () => { + // noop + }) + await bench.run() + expect(cycleReason).toBeUndefined() +}) From 556d884c04e8be4a07c83e09561bfd88822c375c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Sat, 30 May 2026 15:48:57 +0200 Subject: [PATCH 10/11] docs(readme): document timer overhead correction, per-sample override, and timer diagnostics * New 'Timer Overhead Correction' section covers `subtractTimerOverhead`, the calibration helper, and the `concurrency: 'task'` and sub-overhead caveats. * New 'Per-Sample Override' section documents `overriddenDuration` (previously absent from the README despite being supported in code). * New 'Timer Diagnostics' section covers `Task.detectedResolution` and the `'warning'` event with its `TimerSaturationReason` payload, plus pointers to the standalone helpers. * Extend the `BenchEvents` listener example with a `'warning'` listener that reads `evt.reason`. --- README.md | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/README.md b/README.md index eebe95c7..863d168e 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,12 @@ Both the `Task` and `Bench` classes extend the `EventTarget` object. So you can bench.addEventListener('cycle', (evt) => { const task = evt.task!; }); + +// runs when timer saturation is detected for a task's measured samples +bench.addEventListener('warning', (evt) => { + const task = evt.task!; + const reason = evt.reason; // 'zero-dominated' | 'low-distinct' | 'zero-mad' +}); ``` #### [`TaskEvents`](https://tinylibs.github.io/tinybench/types/TaskEvents.html) @@ -286,6 +292,96 @@ const bench = new Bench({ }) ``` +## Timer Overhead Correction + +Each timer call (`performance.now()`, `process.hrtime.bigint()`, …) has a +non-zero call cost `C`. For a task whose true duration `X` is comparable +to `C`, the raw measured sample `X + C` is dominated by the timer rather +than the task. + +When `subtractTimerOverhead: true` is set, an estimate `Ĉ` is computed +once at construction time via [`calibrateTimerOverhead`](https://tinylibs.github.io/tinybench/functions/calibrateTimerOverhead.html), +and `Math.max(0, raw_sample - Ĉ)` is used as each non-overridden sample +before statistics are computed. + +```ts +const bench = new Bench({ subtractTimerOverhead: true }) +console.log(bench.timerOverhead) // calibrated Ĉ in ms (or undefined) +``` + +The calibration helper is also exported for direct use, with a +configurable estimator strategy (`'median'` default, or `'min'` / `'p05'`): + +```ts +import { calibrateTimerOverhead, hrtimeNowTimestampProvider } from 'tinybench' + +const overhead = calibrateTimerOverhead(hrtimeNowTimestampProvider, { + estimator: 'p05', + samples: 1024, + warmupSamples: 64, +}) +``` + +**Caveats.** + +- Incompatible with `concurrency: 'task'` — overhead is calibrated + sequentially and does not reflect concurrent execution cost. + Construction (and `run()`) throws if both are set. +- For sub-overhead measurements (`X ≈ Ĉ`) the `max(0, …)` clamp + truncates the lower tail and biases statistics; prefer + `overriddenDuration` (see below). +- On runtimes with a coarse timer (resolution >= 1 ms) the calibration + returns `0` and the option becomes a no-op. + +## Per-Sample Override (`overriddenDuration`) + +A task function may return an object containing `overriddenDuration` +(in ms). That value replaces the timer-measured sample directly, +bypassing both the timer and any overhead correction. Useful for +externally-timed work or sub-overhead measurements that the timer +cannot resolve. + +```ts +bench.add('externally-timed', () => { + const start = process.hrtime.bigint() + doWork() + const elapsedMs = Number(process.hrtime.bigint() - start) / 1e6 + return { overriddenDuration: elapsedMs } +}) +``` + +Overridden samples are excluded from `Task.detectedResolution` and +from timer-saturation detection. + +## Timer Diagnostics + +After `bench.run()` (or `runSync()`), each task exposes +`detectedResolution` — the smallest reproducibly observed positive +sample (in ms) among the timer-measured samples, or `undefined` when no +positive timer measurement was seen (e.g. every sample was overridden). + +```ts +const task = bench.getTask('foo') +console.log(task?.detectedResolution) // e.g. 0.000041 (≈ 41 ns) +``` + +When the timer's resolution dominates a task's measured distribution +(more than half zero samples, fewer than `max(3, min(10, ⌊n / 1000⌋))` +distinct values, or zero MAD with `n > 100`), tinybench dispatches a +`'warning'` event on both the task and the bench, carrying the matching +[`TimerSaturationReason`](https://tinylibs.github.io/tinybench/types/TimerSaturationReason.html): + +```ts +bench.addEventListener('warning', evt => { + console.warn(`timer-saturated: ${evt.task?.name} — ${evt.reason}`) +}) +``` + +The same heuristic and estimator are exposed as standalone helpers for +custom analysis: [`detectTimerSaturation`](https://tinylibs.github.io/tinybench/functions/detectTimerSaturation.html), +[`classifyTimerSaturation`](https://tinylibs.github.io/tinybench/functions/classifyTimerSaturation.html), +and [`estimateResolution`](https://tinylibs.github.io/tinybench/functions/estimateResolution.html). + ## Aborting Benchmarks Tinybench supports aborting benchmarks using `AbortSignal` at both the bench and task levels: From eac7e03eec956b2de5fcff6a030839ea7785683f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Benoit?= Date: Sat, 30 May 2026 16:18:04 +0200 Subject: [PATCH 11/11] fix(utils): use backticked refs for non-exported symbols in JSDoc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `computeStatistics` and `absoluteDeviationMedian` are not re-exported from the package entry point, so `{@link …}` references to them trigger `typedoc --treatWarningsAsErrors`. Switch them to plain backticked code references; `{@link}` is preserved only for symbols listed in the public exports. --- src/utils.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/utils.ts b/src/utils.ts index b1f4a7cc..2f853c01 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -275,7 +275,7 @@ export const isValidSamples = ( * the threshold is reached. * @param samples - the latency samples, sorted ascending * @param mad - the median absolute deviation (e.g. from - * {@link medianAbsoluteDeviation} or {@link computeStatistics}) + * {@link medianAbsoluteDeviation} or `computeStatistics`) * @returns the saturation reason, or `undefined` when no criterion fires */ export const classifyTimerSaturation = ( @@ -617,8 +617,8 @@ export function absoluteDeviationMedian ( * Computes the median absolute deviation (MAD) of a sorted sample set. * * Convenience wrapper that derives the median from the sorted input and - * forwards to {@link absoluteDeviationMedian}. Use when only `mad` is - * required and the cost of a full {@link computeStatistics} pass is + * forwards to `absoluteDeviationMedian`. Use when only `mad` is + * required and the cost of a full `computeStatistics` pass is * unjustified (e.g. inside {@link classifyTimerSaturation}). * @param samples - the sorted sample, length ≥ 1 * @returns the median absolute deviation