Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
d6c7dac
Try docker improvements
alan-george-lk Jun 4, 2026
1007f2b
Try better docker behavior
alan-george-lk Jun 4, 2026
2ed1981
Try a docker-free run
alan-george-lk Jun 4, 2026
f0124cd
Add nightly build, change how docker pushes
alan-george-lk Jun 9, 2026
19f98c2
Run stress tests in nightly
alan-george-lk Jun 9, 2026
fa14a98
Run nightly nowly
alan-george-lk Jun 9, 2026
8b49e9d
Hopefully clean up docker
alan-george-lk Jun 10, 2026
8a67489
Try cleaner shutdown for test instability
alan-george-lk Jun 10, 2026
5889f75
Try better bracktracing on nightly, reduce iteration count
alan-george-lk Jun 15, 2026
5ad3be4
Improve macOS PlatformAudio CI diagnostics and isolate flaky tests.
alan-george-lk Jun 16, 2026
b5ce47e
Fix macOS CI by not running the full test suite under lldb.
alan-george-lk Jun 16, 2026
2b0513a
Use single-run verbose unit tests on nightly PR invocations.
alan-george-lk Jun 16, 2026
274092e
Maybe sanitizer
alan-george-lk Jun 16, 2026
bb0386b
Maybe an integration test fix
alan-george-lk Jun 16, 2026
5e13196
Another fix try
alan-george-lk Jun 16, 2026
7d9c735
Try simpler cleanup
alan-george-lk Jun 16, 2026
dd41776
Move rpc test to stress
alan-george-lk Jun 16, 2026
9460ede
Try debug log
alan-george-lk Jun 16, 2026
0e41652
Debug logs on nightly tests as well
alan-george-lk Jun 16, 2026
ea27652
Increase PlatformAudio nightly diagnostics
alan-george-lk Jun 17, 2026
35d954a
Attempt to catch linux hang
alan-george-lk Jun 21, 2026
e3891a3
Bring in platform-audio stuff and rebase off main
alan-george-lk Jun 28, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
220 changes: 220 additions & 0 deletions .github/scripts/run_tests_with_backtrace.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
#!/usr/bin/env bash
# Run a test binary under debug CI. On fatal signals, print post-mortem
# backtraces from core dumps when available. Linux also runs under catchsegv
# so a partial backtrace appears in the log even without a core file.
#
# When LIVEKIT_TEST_STALL_SECONDS is set to a positive integer, a watchdog
# monitors test output and dumps live thread backtraces if the log goes silent
# for that many seconds (integration-test hang diagnostics on linux-x64).
set -uo pipefail

usage() {
echo "Usage: $0 <test-binary> [gtest-args...]" >&2
exit 2
}

[[ $# -ge 1 ]] || usage

binary=$1
shift

if [[ ! -x "$binary" ]]; then
echo "Error: not executable: $binary" >&2
exit 2
fi

binary_abs=$(cd "$(dirname "$binary")" && pwd)/$(basename "$binary")
core_dir="${RUNNER_TEMP:-/tmp}/livekit-test-cores"
mkdir -p "$core_dir"

ulimit -c unlimited || true

if [[ "$(uname -s)" == "Linux" ]]; then
echo "${core_dir}/core.%e.%p" | sudo tee /proc/sys/kernel/core_pattern >/dev/null || true
fi

if [[ "$(uname -s)" == "Darwin" ]]; then
ulimit -c unlimited || true
sudo sysctl -w kern.coredump=1 >/dev/null 2>&1 || true
sudo mkdir -p /cores 2>/dev/null || true
sudo chmod 1777 /cores 2>/dev/null || true
fi

dump_macos_crash_reports() {
local binary_name
binary_name=$(basename "${binary_abs}")
echo "=== macOS DiagnosticReports for ${binary_name} ==="
local found=0
for report_dir in "${HOME}/Library/Logs/DiagnosticReports" "/Library/Logs/DiagnosticReports"; do
if [[ ! -d "${report_dir}" ]]; then
continue
fi
while IFS= read -r report; do
found=1
echo "Crash report: ${report}"
# .ips files are JSON-ish; print the first 200 lines for the CI log.
head -n 200 "${report}" || true
done < <(find "${report_dir}" -maxdepth 1 -name "${binary_name}*.ips" -type f -print 2>/dev/null | sort -r | head -3)
done
if ((found == 0)); then
echo "No DiagnosticReports .ips found for ${binary_name}"
fi
}

dump_live_backtraces() {
local test_pid=$1
local reason=$2

echo "=== live backtrace diagnostics (${reason}, pid ${test_pid}) ==="

if [[ "$(uname -s)" == "Linux" ]]; then
if command -v gdb >/dev/null 2>&1; then
gdb -batch \
-ex 'set pagination off' \
-ex 'thread apply all bt full' \
-p "${test_pid}" || true
else
echo "gdb not available; install gdb for live backtraces"
fi
return 0
fi

if [[ "$(uname -s)" == "Darwin" ]]; then
if command -v sample >/dev/null 2>&1; then
sample "${test_pid}" 5 -mayDie 2>&1 || true
fi
if command -v lldb >/dev/null 2>&1; then
lldb -p "${test_pid}" --batch -o 'thread backtrace all' -o 'detach' -o 'quit' 2>&1 || true
else
echo "lldb not available"
fi
fi
}

dump_backtraces() {
local test_pid=$1
local status=$2

echo "=== crash diagnostics (exit status ${status}, pid ${test_pid}) ==="

if [[ "$(uname -s)" == "Linux" ]]; then
local core=""
core=$(find "$core_dir" -maxdepth 1 -name 'core.*' -type f 2>/dev/null | sort -r | head -1)
if [[ -z "$core" ]]; then
core=$(find /tmp -maxdepth 1 -name 'core.*' -type f 2>/dev/null | sort -r | head -1)
fi
if [[ -n "$core" && -f "$core" ]]; then
echo "Core file: ${core}"
if command -v gdb >/dev/null 2>&1; then
gdb -batch \
-ex 'set pagination off' \
-ex 'thread apply all bt full' \
"${binary_abs}" "${core}" || true
else
echo "gdb not available; install gdb for post-mortem backtraces"
fi
cp -a "${core}" "${core_dir}/" 2>/dev/null || true
basename "${core}" >"${core_dir}/last-core.name"
else
echo "No core file found under ${core_dir} or /tmp"
fi
return 0
fi

if [[ "$(uname -s)" == "Darwin" ]]; then
local core=""
for candidate in "/cores/core.${test_pid}" "/cores/core.${test_pid}.dump"; do
if [[ -f "${candidate}" ]]; then
core=${candidate}
break
fi
done
if [[ -z "$core" ]]; then
core=$(find /cores -maxdepth 1 -name "core.*" -type f 2>/dev/null | sort -r | head -1)
fi
if [[ -n "$core" && -f "$core" ]]; then
echo "Core file: ${core}"
if command -v lldb >/dev/null 2>&1; then
lldb -b -c "${core}" -o 'thread backtrace all' -o 'quit' -- "${binary_abs}" || true
else
echo "lldb not available"
fi
cp -a "${core}" "${core_dir}/" 2>/dev/null || true
basename "${core}" >"${core_dir}/last-core.name"
else
echo "No core file found under /cores for pid ${test_pid}"
fi
dump_macos_crash_reports
fi
}

run_test() {
if [[ "$(uname -s)" == "Linux" ]] && command -v catchsegv >/dev/null 2>&1; then
catchsegv "${binary_abs}" "$@"
else
"${binary_abs}" "$@"
fi
}

start_stall_watchdog() {
local test_pid=$1
local log_file=$2
local stall_limit=$3

(
local last_size=-1
local stall=0
while kill -0 "${test_pid}" 2>/dev/null; do
local size
size=$(wc -c <"${log_file}" 2>/dev/null || echo 0)
if [[ "${size}" == "${last_size}" ]]; then
stall=$((stall + 5))
else
stall=0
last_size=${size}
fi
if ((stall >= stall_limit)); then
echo "=== TEST HANG DETECTED: no output for ${stall}s (pid ${test_pid}) ==="
echo "--- last log lines ---"
tail -n 40 "${log_file}" || true
dump_live_backtraces "${test_pid}" "stall ${stall}s"
kill -ABRT "${test_pid}" 2>/dev/null || kill -TERM "${test_pid}" 2>/dev/null || true
break
fi
sleep 5
done
) &
echo $!
}

stall_limit=${LIVEKIT_TEST_STALL_SECONDS:-0}
log_file="${RUNNER_TEMP:-/tmp}/livekit-test-output.log"

set +e
if ((stall_limit > 0)); then
: >"${log_file}"
run_test "$@" >"${log_file}" 2>&1 &
test_pid=$!
watchdog_pid=$(start_stall_watchdog "${test_pid}" "${log_file}" "${stall_limit}")
wait "${test_pid}"
status=$?
kill "${watchdog_pid}" 2>/dev/null || true
wait "${watchdog_pid}" 2>/dev/null || true
cat "${log_file}"
else
run_test "$@" &
test_pid=$!
wait "${test_pid}"
status=$?
fi
set -e

if ((status > 128)); then
signal=$((status - 128))
echo "Test process ${test_pid} terminated by signal ${signal}"
dump_backtraces "${test_pid}" "${status}"
elif ((status != 0)); then
echo "Test process exited with status ${status}"
fi

exit "${status}"
47 changes: 47 additions & 0 deletions .github/scripts/stage_crash_diagnostics.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env bash
# Collect unstripped test binaries, shared libraries, and core dumps for upload.
set -euo pipefail

build_dir=${1:?usage: stage_crash_diagnostics.sh <build-dir>}
staging="${RUNNER_TEMP}/crash-diagnostics"

rm -rf "${staging}"
mkdir -p "${staging}/bin" "${staging}/lib" "${staging}/cores"

shopt -s nullglob
for bin in "${build_dir}"/bin/livekit_*; do
if [[ -f "${bin}" && -x "${bin}" ]]; then
cp -a "${bin}" "${staging}/bin/"
fi
done

for lib in "${build_dir}"/lib/liblivekit.*; do
if [[ -f "${lib}" ]]; then
cp -a "${lib}" "${staging}/lib/"
fi
done

while IFS= read -r -d '' ffi_lib; do
cp -a "${ffi_lib}" "${staging}/lib/"
done < <(find client-sdk-rust/target/debug -name 'liblivekit_ffi.*' -print0 2>/dev/null)

core_dir="${RUNNER_TEMP}/livekit-test-cores"
if [[ -d "${core_dir}" ]]; then
find "${core_dir}" -maxdepth 1 -name 'core.*' -type f -exec cp -a {} "${staging}/cores/" \; 2>/dev/null || true
fi

if [[ "$(uname -s)" == "Darwin" && -d /cores ]]; then
find /cores -maxdepth 1 -name 'core.*' -type f -exec cp -a {} "${staging}/cores/" \; 2>/dev/null || true
fi

if [[ "$(uname -s)" == "Darwin" ]]; then
mkdir -p "${staging}/crash-reports"
for report_dir in "${HOME}/Library/Logs/DiagnosticReports" "/Library/Logs/DiagnosticReports"; do
if [[ -d "${report_dir}" ]]; then
find "${report_dir}" -maxdepth 1 -name '*.ips' -type f -exec cp -a {} "${staging}/crash-reports/" \; 2>/dev/null || true
fi
done
fi

echo "Staged crash diagnostics under ${staging}:"
find "${staging}" -type f -print
Loading
Loading