diff --git a/.github/workflows/comment.yml b/.github/workflows/comment.yml index 5ef2da52..c06d9726 100644 --- a/.github/workflows/comment.yml +++ b/.github/workflows/comment.yml @@ -9,7 +9,7 @@ permissions: {} jobs: upload-pr-comment: if: ${{ github.event.workflow_run.event == 'pull_request' }} - + name: Upload PR comment runs-on: ubuntu-latest permissions: @@ -17,33 +17,40 @@ jobs: pull-requests: write steps: - - name: List Annotations + - name: Download comparison artifacts uses: actions/github-script@v9 with: script: | + let fs = require('fs'); let artifacts = await github.rest.actions.listWorkflowRunArtifacts({ owner: context.repo.owner, repo: context.repo.repo, run_id: ${{ github.event.workflow_run.id }}, }); - // List all artifacts - let matchArtifact = artifacts.data.artifacts.filter((artifact) => { - return artifact.name == "comment" - })[0]; - - // Download the artifact to github.workspace - let download = await github.rest.actions.downloadArtifact({ - owner: context.repo.owner, - repo: context.repo.repo, - artifact_id: matchArtifact.id, - archive_format: 'zip', - }); - - let fs = require('fs'); - fs.writeFileSync('${{ github.workspace }}/comment.zip', Buffer.from(download.data)); + for (let wanted of ["comment-gnu", "comment-bfs"]) { + let match = artifacts.data.artifacts.find((a) => a.name === wanted); + if (!match) { + console.log(`Artifact ${wanted} not found`); + continue; + } + let download = await github.rest.actions.downloadArtifact({ + owner: context.repo.owner, + repo: context.repo.repo, + artifact_id: match.id, + archive_format: 'zip', + }); + fs.writeFileSync(`${{ github.workspace }}/${wanted}.zip`, Buffer.from(download.data)); + } - - run: unzip comment.zip + - name: Extract artifacts + run: | + for a in comment-gnu comment-bfs; do + if test -f "$a.zip"; then + mkdir -p "$a" + unzip -o "$a.zip" -d "$a" || echo "Failed to unzip $a.zip" + fi + done - name: Comment on PR uses: actions/github-script@v9 @@ -51,26 +58,36 @@ jobs: github-token: ${{ secrets.GITHUB_TOKEN }} script: | let fs = require('fs'); - let annotations = JSON.parse(fs.readFileSync('./annotations.json', 'utf8')); - let annotationContent = annotations - .data - .map(annotation => `${annotation.run}: ${annotation.annotation.message}`) - .join('\n'); + function read(path) { + try { return fs.readFileSync(path, 'utf8'); } catch (e) { return ''; } + } + + // The PR number is written to NR by both jobs. + let nr = read('comment-gnu/NR').trim() || read('comment-bfs/NR').trim(); + if (!nr) { + console.log('No PR number found; skipping comment'); + return; + } + + let gnu = read('comment-gnu/result-gnu.txt').trim(); + let bfs = read('comment-bfs/result-bfs.txt').trim(); - // check if no changes - let gnuTestReport = annotationContent.includes('Run GNU findutils tests: Gnu tests No changes'); - let bfsTestReport = annotationContent.includes('Run BFS tests: BFS tests No changes'); + let sections = []; + if (gnu) sections.push('GNU findutils testsuite:\n```\n' + gnu + '\n```'); + if (bfs) sections.push('bfs testsuite:\n```\n' + bfs + '\n```'); - if (gnuTestReport && bfsTestReport) { - console.log('No changes'); + if (sections.length === 0) { + console.log('No test result changes; skipping comment'); return; } - // Comment on the PR - github.rest.issues.createComment({ + let body = 'Commit ${{ github.event.workflow_run.head_sha }} has test result changes:\n\n' + + sections.join('\n\n'); + + await github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, - issue_number: annotations.pull_request_number, - body: 'Commit ${{ github.event.workflow_run.head_sha }} has GNU testsuite comparison:\n```\n' + annotationContent + '\n```\n' - }); \ No newline at end of file + issue_number: Number(nr), + body: body, + }); diff --git a/.github/workflows/compat.yml b/.github/workflows/compat.yml index c6b0e3e5..98485c85 100644 --- a/.github/workflows/compat.yml +++ b/.github/workflows/compat.yml @@ -3,6 +3,8 @@ on: [push, pull_request] name: External-testsuites env: CARGO_INCREMENTAL: "0" + # Default branch, used to fetch the reference (baseline) test results. + DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} jobs: gnu-tests: permissions: @@ -58,11 +60,9 @@ jobs: findutils-x86_64-unknown-linux-gnu.tar.zst env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Extract testing info - shell: bash - run: | - name: Upload gnu-test-report + if: success() || failure() uses: actions/upload-artifact@v7 with: name: gnu-test-report @@ -70,62 +70,60 @@ jobs: findutils.gnu/find/testsuite/*.log findutils.gnu/xargs/testsuite/*.log findutils.gnu/tests/**/*.log - - name: Upload gnu-result + # The per-test JSON summary doubles as the baseline: future runs on the + # default branch download this artifact to diff against. + - name: Upload gnu-full-result + if: success() || failure() uses: actions/upload-artifact@v7 with: - name: gnu-result - path: gnu-result.json - - name: Download artifacts (gnu-result and gnu-test-report) - uses: actions/github-script@v9 - with: - script: | - let fs = require('fs'); - fs.mkdirSync('${{ github.workspace }}/dl', { recursive: true }); - - async function downloadArtifact(artifactName) { - // List all artifacts from the workflow run - let artifacts = await github.rest.actions.listWorkflowRunArtifacts({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: ${{ github.run_id }}, - }); - - // Find the specified artifact - let matchArtifact = artifacts.data.artifacts.find((artifact) => artifact.name === artifactName); - if (!matchArtifact) { - throw new Error(`Artifact "${artifactName}" not found.`); - } + name: gnu-full-result + path: findutils-gnu-full-result.json + if-no-files-found: warn - // Download the artifact - let download = await github.rest.actions.downloadArtifact({ - owner: context.repo.owner, - repo: context.repo.repo, - artifact_id: matchArtifact.id, - archive_format: 'zip', - }); - - // Save the artifact to a file - fs.writeFileSync(`${{ github.workspace }}/dl/${artifactName}.zip`, Buffer.from(download.data)); - } - - // Download the required artifacts - await downloadArtifact("gnu-result"); - await downloadArtifact("gnu-test-report"); - - - name: Compare failing tests against master - shell: bash - run: | - ./findutils/util/diff-gnu.sh ./dl ./findutils.gnu - - name: Compare against main results + - name: Retrieve reference results + uses: dawidd6/action-download-artifact@v21 + continue-on-error: true + with: + workflow: compat.yml + branch: ${{ env.DEFAULT_BRANCH }} + workflow_conclusion: completed + name: gnu-full-result + path: reference-gnu + if_no_artifact_found: warn + + - name: Compare against reference results shell: bash run: | - unzip dl/gnu-result.zip -d dl/ - unzip dl/gnu-test-report.zip -d dl/ - mv dl/gnu-result.json latest-gnu-result.json - python findutils/util/compare_gnu_result.py + mkdir -p comment + echo "${{ github.event.number }}" > comment/NR + REF="reference-gnu/findutils-gnu-full-result.json" + CUR="findutils-gnu-full-result.json" + : > comment/result-gnu.txt + if test ! -f "${CUR}"; then + echo "::error ::Missing current GNU results (${CUR}); failing early" + exit 1 + fi + if test -f "${REF}"; then + python3 findutils/util/compare_test_results.py \ + --ignore-file findutils/.github/workflows/ignore-intermittent.txt \ + --output comment/result-gnu.txt \ + "${CUR}" "${REF}" + else + echo "::warning ::No GNU reference results available yet; skipping comparison." + fi + + - name: Upload GNU comparison comment + if: ${{ github.event_name == 'pull_request' && (success() || failure()) }} + uses: actions/upload-artifact@v7 + with: + name: comment-gnu + path: comment/ bfs-tests: name: Run BFS tests + permissions: + actions: read + contents: read runs-on: ubuntu-latest steps: - name: Checkout findutils @@ -152,110 +150,54 @@ jobs: export CARGO_INCREMENTAL=0 bash util/build-bfs.sh ||: - name: Upload bfs-test-report + if: success() || failure() uses: actions/upload-artifact@v7 with: name: bfs-test-report path: bfs/tests.log - - name: Upload bfs-result + - name: Upload bfs-full-result + if: success() || failure() uses: actions/upload-artifact@v7 with: - name: bfs-result - path: bfs-result.json - - name: Download artifacts (gnu-result and bfs-test-report) - uses: actions/github-script@v9 - with: - script: | - let fs = require('fs'); - fs.mkdirSync('${{ github.workspace }}/dl', { recursive: true }); - - async function downloadArtifact(artifactName) { - // List all artifacts from the workflow run - let artifacts = await github.rest.actions.listWorkflowRunArtifacts({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: ${{ github.run_id }}, - }); - - // Find the specified artifact - let matchArtifact = artifacts.data.artifacts.find((artifact) => artifact.name === artifactName); - if (!matchArtifact) { - throw new Error(`Artifact "${artifactName}" not found.`); - } + name: bfs-full-result + path: bfs-full-result.json + if-no-files-found: warn - // Download the artifact - let download = await github.rest.actions.downloadArtifact({ - owner: context.repo.owner, - repo: context.repo.repo, - artifact_id: matchArtifact.id, - archive_format: 'zip', - }); - - // Save the artifact to a file - fs.writeFileSync(`${{ github.workspace }}/dl/${artifactName}.zip`, Buffer.from(download.data)); - } - - // Download the required artifacts - await downloadArtifact("bfs-result"); - await downloadArtifact("bfs-test-report"); - - name: Compare failing tests against main - shell: bash - run: | - ./findutils/util/diff-bfs.sh dl/tests.log bfs/tests.log - - name: Compare against main results + - name: Retrieve reference results + uses: dawidd6/action-download-artifact@v21 + continue-on-error: true + with: + workflow: compat.yml + branch: ${{ env.DEFAULT_BRANCH }} + workflow_conclusion: completed + name: bfs-full-result + path: reference-bfs + if_no_artifact_found: warn + + - name: Compare against reference results shell: bash run: | - unzip dl/bfs-result.zip -d dl/ - unzip dl/bfs-test-report.zip -d dl/ - mv dl/bfs-result.json latest-bfs-result.json - python findutils/util/compare_bfs_result.py - - upload-annotations: - name: Upload annotations - runs-on: ubuntu-latest - needs: [gnu-tests, bfs-tests] - if: ${{ github.event_name == 'pull_request' }} - - steps: - - name: List Annotations - uses: actions/github-script@v9 - - with: - script: | - let runs = await github.rest.checks.listForRef({ - owner: context.repo.owner, - repo: context.repo.repo, - ref: '${{ github.event.pull_request.head.sha }}' - }); - - let names = ['Run GNU findutils tests', 'Run BFS tests']; - let results = []; - runs.data.check_runs.filter(check => names.includes(check.name)).forEach(run => results.push(run)); - - let annotations = { data: [], pull_request_number: '${{ github.event.number }}' }; - for (let result of results) { - let run = await github.rest.checks.listAnnotations({ - owner: context.repo.owner, - repo: context.repo.repo, - check_run_id: result.id - }); - - run.data.forEach(data => { - annotations.data.push({ - run: result.name, - annotation: data - }); - }); - } - - // Remove duplicate items. - annotations.data = annotations.data.filter((value, index, self) => - self.findIndex(v => v.annotation.message === value.annotation.message) === index); - - let fs = require('fs'); - fs.writeFileSync('${{ github.workspace }}/annotations.json', JSON.stringify(annotations)); - - - name: Upload annotations + mkdir -p comment + echo "${{ github.event.number }}" > comment/NR + REF="reference-bfs/bfs-full-result.json" + CUR="bfs-full-result.json" + : > comment/result-bfs.txt + if test ! -f "${CUR}"; then + echo "::error ::Missing current bfs results (${CUR}); failing early" + exit 1 + fi + if test -f "${REF}"; then + python3 findutils/util/compare_test_results.py \ + --ignore-file findutils/.github/workflows/ignore-intermittent.txt \ + --output comment/result-bfs.txt \ + "${CUR}" "${REF}" + else + echo "::warning ::No bfs reference results available yet; skipping comparison." + fi + + - name: Upload BFS comparison comment + if: ${{ github.event_name == 'pull_request' && (success() || failure()) }} uses: actions/upload-artifact@v7 with: - name: comment - path: annotations.json + name: comment-bfs + path: comment/ diff --git a/.github/workflows/ignore-intermittent.txt b/.github/workflows/ignore-intermittent.txt new file mode 100644 index 00000000..ea74f603 --- /dev/null +++ b/.github/workflows/ignore-intermittent.txt @@ -0,0 +1,16 @@ +# List of intermittent test names to ignore when comparing GNU/bfs results. +# Format: one test name per line; lines starting with # are comments. +# +# A test listed here is still reported in the PR comment (marked +# "intermittent"), but a *new* failure of it will not fail the CI job. Use this +# for tests that are known to be flaky or environment-dependent, so genuine +# regressions in other tests stay actionable. +# +# Names must match those in the *-full-result.json files: +# * GNU dejagnu find tests: .new-O[0-3] (e.g. printf.new-O0) +# * GNU dejagnu xargs tests: (e.g. IARG, space) +# * GNU automake tests: tests// (e.g. tests/find/used) +# * bfs tests: / (e.g. posix/HL) +# +# Example: +# tests/find/some-flaky-test diff --git a/util/bfs_json_result.py b/util/bfs_json_result.py new file mode 100755 index 00000000..45735a3c --- /dev/null +++ b/util/bfs_json_result.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 + +""" +Build a per-test JSON summary of a bfs testsuite run. + +bfs writes one line per test to tests.log: + + [PASS] posix/H + [FAIL] common/newermt + [SKIP] gnu/... + +Output format matches util/gnu_json_result.py and is consumed by +compare_test_results.py: + + { + "summary": {"total": N, "passed": P, "failed": F, "skipped": S}, + "tests": [{"name": "...", "status": "PASS|FAIL|SKIP"}, ...] + } +""" + +import json +import re +import sys +from pathlib import Path + +RESULT_RE = re.compile(r"^\[(PASS|FAIL|SKIP)\] (\S+)\s*$") + + +def collect(log_file): + tests = {} + log = Path(log_file) + if log.is_file(): + for line in log.read_text(encoding="utf-8", errors="replace").splitlines(): + m = RESULT_RE.match(line) + if m: + tests[m.group(2)] = m.group(1) + return tests + + +def build(log_file): + tests = collect(log_file) + passed = sum(1 for s in tests.values() if s == "PASS") + failed = sum(1 for s in tests.values() if s == "FAIL") + skipped = sum(1 for s in tests.values() if s == "SKIP") + return { + "summary": { + "total": len(tests), + "passed": passed, + "failed": failed, + "skipped": skipped, + }, + "tests": [ + {"name": name, "status": status} + for name, status in sorted(tests.items()) + ], + } + + +def main(): + if len(sys.argv) != 3: + print(f"usage: {sys.argv[0]} ", file=sys.stderr) + return 2 + result = build(sys.argv[1]) + with open(sys.argv[2], "w", encoding="utf-8") as f: + json.dump(result, f, indent=2, sort_keys=True) + f.write("\n") + s = result["summary"] + print( + f"bfs tests summary = TOTAL: {s['total']} / " + f"PASS: {s['passed']} / FAIL: {s['failed']} / SKIP: {s['skipped']}" + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/util/build-bfs.sh b/util/build-bfs.sh index c394112e..8f38a928 100755 --- a/util/build-bfs.sh +++ b/util/build-bfs.sh @@ -2,6 +2,9 @@ set -eo pipefail +# Repository root (where util/ lives), captured before we cd into the bfs tree. +REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" + if ! test -d ../bfs; then echo "Could not find ../bfs" echo "git clone https://github.com/tavianator/bfs.git" @@ -24,30 +27,18 @@ fi LOG_FILE=tests.log ./tests/tests.sh --bfs="$FIND" "$@" 2>&1 | tee "$LOG_FILE" || : -PASS=$(sed -En 's|^\[PASS] *([0-9]+) / .*|\1|p' "$LOG_FILE") -SKIP=$(sed -En 's|^\[SKIP] *([0-9]+) / .*|\1|p' "$LOG_FILE") -FAIL=$(sed -En 's|^\[FAIL] *([0-9]+) / .*|\1|p' "$LOG_FILE") +# Build a per-test JSON summary (name + status for every test) used by +# compare_test_results.py to detect per-test improvements/regressions. +RESULT_JSON="${RESULT_JSON:-../bfs-full-result.json}" +output="$(python3 "${REPO_DIR}/util/bfs_json_result.py" "$LOG_FILE" "${RESULT_JSON}")" +echo "${output}" -# Default any missing numbers to zero (e.g. no tests skipped) -: ${PASS:=0} -: ${SKIP:=0} -: ${FAIL:=0} +TOTAL=$(python3 -c "import json,sys;print(json.load(open(sys.argv[1]))['summary']['total'])" "${RESULT_JSON}") +FAIL=$(python3 -c "import json,sys;print(json.load(open(sys.argv[1]))['summary']['failed'])" "${RESULT_JSON}") -TOTAL=$((PASS + SKIP + FAIL)) if (( TOTAL <= 1 )); then echo "Error in the execution, failing early" exit 1 fi -output="BFS tests summary = TOTAL: $TOTAL / PASS: $PASS / SKIP: $SKIP / FAIL: $FAIL" -echo "${output}" if (( FAIL > 0 )); then echo "::warning ::${output}"; fi - -jq -n \ - --arg date "$(date --rfc-email)" \ - --arg sha "$GITHUB_SHA" \ - --arg total "$TOTAL" \ - --arg pass "$PASS" \ - --arg skip "$SKIP" \ - --arg fail "$FAIL" \ - '{($date): { sha: $sha, total: $total, pass: $pass, skip: $skip, fail: $fail, }}' > ../bfs-result.json diff --git a/util/build-gnu.sh b/util/build-gnu.sh index 960f6907..945ff9b5 100755 --- a/util/build-gnu.sh +++ b/util/build-gnu.sh @@ -2,6 +2,9 @@ set -e +# Repository root (where util/ lives), captured before we cd into the GNU tree. +REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" + if test ! -d ../findutils.gnu; then echo "Could not find ../findutils.gnu" echo "git clone https://git.savannah.gnu.org/git/findutils.git findutils.gnu" @@ -36,51 +39,18 @@ make check-TESTS $RUN_TEST || : make -C find/testsuite check || : make -C xargs/testsuite check || : -PASS=0 -SKIP=0 -FAIL=0 -XPASS=0 -ERROR=0 - -LOG_FILE=./find/testsuite/find.log -if test -f "$LOG_FILE"; then - ((PASS += $(sed -En 's/# of expected passes\s*//p' "$LOG_FILE"))) || : - ((FAIL += $(sed -En 's/# of unexpected failures\s*//p' "$LOG_FILE"))) || : -fi - -LOG_FILE=./xargs/testsuite/xargs.log -if test -f "$LOG_FILE"; then - ((PASS += $(sed -En 's/# of expected passes\s*//p' "$LOG_FILE"))) || : - ((FAIL += $(sed -En 's/# of unexpected failures\s*//p' "$LOG_FILE"))) || : -fi - -((TOTAL = PASS + FAIL)) || : +# Build a per-test JSON summary (name + status for every test) used by +# compare_test_results.py to detect per-test improvements/regressions. +RESULT_JSON="${RESULT_JSON:-../findutils-gnu-full-result.json}" +output="$(python3 "${REPO_DIR}/util/gnu_json_result.py" . "${RESULT_JSON}")" +echo "${output}" -LOG_FILE=./tests/test-suite.log -if test -f "$LOG_FILE"; then - ((TOTAL += $(sed -n "s/.*# TOTAL: \(.*\)/\1/p" "$LOG_FILE" | tr -d '\r' | head -n1))) || : - ((PASS += $(sed -n "s/.*# PASS: \(.*\)/\1/p" "$LOG_FILE" | tr -d '\r' | head -n1))) || : - ((SKIP += $(sed -n "s/.*# SKIP: \(.*\)/\1/p" "$LOG_FILE" | tr -d '\r' | head -n1))) || : - ((FAIL += $(sed -n "s/.*# FAIL: \(.*\)/\1/p" "$LOG_FILE" | tr -d '\r' | head -n1))) || : - ((XPASS += $(sed -n "s/.*# XPASS: \(.*\)/\1/p" "$LOG_FILE" | tr -d '\r' | head -n1))) || : - ((ERROR += $(sed -n "s/.*# ERROR: \(.*\)/\1/p" "$LOG_FILE" | tr -d '\r' | head -n1))) || : -fi +TOTAL=$(python3 -c "import json,sys;print(json.load(open(sys.argv[1]))['summary']['total'])" "${RESULT_JSON}") +FAIL=$(python3 -c "import json,sys;print(json.load(open(sys.argv[1]))['summary']['failed'])" "${RESULT_JSON}") if ((TOTAL <= 1)); then echo "Error in the execution, failing early" exit 1 fi -output="GNU tests summary = TOTAL: $TOTAL / PASS: $PASS / FAIL: $FAIL / ERROR: $ERROR" -echo "${output}" -if [[ "$FAIL" -gt 0 || "$ERROR" -gt 0 ]]; then echo "::warning ::${output}" ; fi -jq -n \ - --arg date "$(date --rfc-email)" \ - --arg sha "$GITHUB_SHA" \ - --arg total "$TOTAL" \ - --arg pass "$PASS" \ - --arg skip "$SKIP" \ - --arg fail "$FAIL" \ - --arg xpass "$XPASS" \ - --arg error "$ERROR" \ - '{($date): { sha: $sha, total: $total, pass: $pass, skip: $skip, fail: $fail, xpass: $xpass, error: $error, }}' > ../gnu-result.json +if [[ "$FAIL" -gt 0 ]]; then echo "::warning ::${output}"; fi diff --git a/util/compare_bfs_result.py b/util/compare_bfs_result.py deleted file mode 100644 index b96026bd..00000000 --- a/util/compare_bfs_result.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/python -""" -Compare the current results to the last results gathered from the main branch to highlight -if a PR is making the results better/worse -""" - -import json -import sys - -NEW = json.load(open("bfs-result.json")) -OLD = json.load(open("latest-bfs-result.json")) - -# Extract the specific results from the dicts -[last] = OLD.values() -[current] = NEW.values() - -pass_d = int(current["pass"]) - int(last["pass"]) -skip_d = int(current["skip"]) - int(last.get("skip", 0)) -fail_d = int(current["fail"]) - int(last["fail"]) - -# Get an annotation to highlight changes -print(f"::warning ::Changes from main: PASS {pass_d:+d} / SKIP {skip_d:+d} / FAIL {fail_d:+d}") - -# Check if there are no changes. -if pass_d == 0: - print("::warning ::BFS tests No changes") - -# If results are worse fail the job to draw attention -if pass_d < 0: - sys.exit(1) diff --git a/util/compare_gnu_result.py b/util/compare_gnu_result.py deleted file mode 100644 index 7e5e251e..00000000 --- a/util/compare_gnu_result.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/python -""" -Compare the current results to the last results gathered from the main branch to highlight -if a PR is making the results better/worse -""" - -import json -import sys - -NEW = json.load(open("gnu-result.json")) -OLD = json.load(open("latest-gnu-result.json")) - -# Extract the specific results from the dicts -last = OLD[list(OLD.keys())[0]] -current = NEW[list(NEW.keys())[0]] - -pass_d = int(current["pass"]) - int(last["pass"]) -fail_d = int(current["fail"]) - int(last["fail"]) -error_d = int(current["error"]) - int(last["error"]) -skip_d = int(current["skip"]) - int(last["skip"]) - -# Get an annotation to highlight changes -print( - f"::warning ::Changes from main: PASS {pass_d:+d} / FAIL {fail_d:+d} / ERROR {error_d:+d} / SKIP {skip_d:+d} " -) - -# Check if there are no changes. -if pass_d == 0: - print("::warning ::Gnu tests No changes") - -# If results are worse fail the job to draw attention -if pass_d < 0: - sys.exit(1) diff --git a/util/compare_test_results.py b/util/compare_test_results.py new file mode 100755 index 00000000..bb4be960 --- /dev/null +++ b/util/compare_test_results.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 + +""" +Compare the current GNU test results to the reference results gathered from the +main branch, to highlight whether a PR makes the results better or worse. + +Writes a human-readable comparison to --output (empty when nothing changed, so +the comment workflow can decide to stay silent). Exits 1 when there are new, +non-intermittent failures; intermittent (flaky) tests listed in --ignore-file +are reported but never fail the job. + +Adapted from the uutils sed/grep workflow. +""" + +import json +import sys +import argparse +from pathlib import Path + + +def load_ignore_list(ignore_file): + """Load the set of intermittent test names to ignore from a file.""" + ignore_set = set() + if ignore_file and Path(ignore_file).exists(): + with open(ignore_file, "r") as f: + for line in f: + line = line.strip() + if line and not line.startswith("#"): + ignore_set.add(line) + return ignore_set + + +def extract_test_results(json_data): + """Return (summary, failed_test_names) from parsed JSON data.""" + if not json_data or "summary" not in json_data: + return {"total": 0, "passed": 0, "failed": 0, "skipped": 0}, [] + + summary = json_data["summary"] + failed_tests = [ + test.get("name", "unknown") + for test in json_data.get("tests", []) + if test.get("status") == "FAIL" + ] + return summary, failed_tests + + +def compare_results(current_file, reference_file, ignore_file=None, output_file=None): + """Compare current results with reference results.""" + ignore_set = load_ignore_list(ignore_file) + + try: + with open(current_file, "r") as f: + current_summary, current_failed = extract_test_results(json.load(f)) + except Exception as e: + print(f"Error loading current results: {e}") + return 1 + + try: + with open(reference_file, "r") as f: + reference_summary, reference_failed = extract_test_results(json.load(f)) + except Exception as e: + print(f"Error loading reference results: {e}") + return 1 + + pass_diff = int(current_summary.get("passed", 0)) - int( + reference_summary.get("passed", 0) + ) + fail_diff = int(current_summary.get("failed", 0)) - int( + reference_summary.get("failed", 0) + ) + total_diff = int(current_summary.get("total", 0)) - int( + reference_summary.get("total", 0) + ) + + current_failed_set = set(current_failed) + reference_failed_set = set(reference_failed) + + new_failures = current_failed_set - reference_failed_set + improvements = reference_failed_set - current_failed_set + + non_intermittent_new_failures = new_failures - ignore_set + + no_changes = ( + pass_diff == 0 + and fail_diff == 0 + and total_diff == 0 + and not new_failures + and not improvements + ) + + # Empty output tells the comment workflow there is nothing to post. + if no_changes: + if output_file: + with open(output_file, "w") as f: + f.write("") + return 0 + + output_lines = [] + output_lines.append("Test results comparison:") + output_lines.append( + f" Current: TOTAL: {current_summary.get('total', 0)} / PASSED: {current_summary.get('passed', 0)} / FAILED: {current_summary.get('failed', 0)} / SKIPPED: {current_summary.get('skipped', 0)}" + ) + output_lines.append( + f" Reference: TOTAL: {reference_summary.get('total', 0)} / PASSED: {reference_summary.get('passed', 0)} / FAILED: {reference_summary.get('failed', 0)} / SKIPPED: {reference_summary.get('skipped', 0)}" + ) + output_lines.append("") + + if pass_diff != 0 or fail_diff != 0 or total_diff != 0: + output_lines.append("Changes from main branch:") + output_lines.append(f" TOTAL: {total_diff:+d}") + output_lines.append(f" PASSED: {pass_diff:+d}") + output_lines.append(f" FAILED: {fail_diff:+d}") + output_lines.append("") + + if new_failures: + # Only non-intermittent failures fail the job, but list them all. + real = sorted(new_failures - ignore_set) + flaky = sorted(new_failures & ignore_set) + output_lines.append(f"New test failures ({len(new_failures)}):") + for test in real: + output_lines.append(f" - {test}") + for test in flaky: + output_lines.append(f" - {test} (intermittent)") + output_lines.append("") + + if improvements: + output_lines.append(f"Test improvements ({len(improvements)}):") + for test in sorted(improvements): + output_lines.append(f" + {test}") + output_lines.append("") + + output_text = "\n".join(output_lines) + if output_file: + with open(output_file, "w") as f: + f.write(output_text) + else: + print(output_text) + + if non_intermittent_new_failures: + print( + f"ERROR: Found {len(non_intermittent_new_failures)} new non-intermittent test failures" + ) + return 1 + + return 0 + + +def main(): + parser = argparse.ArgumentParser(description="Compare GNU test results") + parser.add_argument("current", help="Current test results JSON file") + parser.add_argument("reference", help="Reference test results JSON file") + parser.add_argument( + "--ignore-file", help="File containing intermittent test names to ignore" + ) + parser.add_argument("--output", help="Output file for comparison results") + + args = parser.parse_args() + return compare_results(args.current, args.reference, args.ignore_file, args.output) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/util/diff-bfs.sh b/util/diff-bfs.sh deleted file mode 100755 index 4d821b91..00000000 --- a/util/diff-bfs.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -set -eu - -export LC_COLLATE=C - -# Extract the failing test lines from log files -failing_tests() { - sed -En 's/^\[FAIL\] (.*[a-z].*)/\1/p' "$1" | sort -} - -comm -3 <(failing_tests "$1") <(failing_tests "$2") | tr '\t' ',' | while IFS=, read old new; do - if [ -n "$old" ]; then - echo "::warning ::Congrats! The bfs test $old is now passing!" - fi - if [ -n "$new" ]; then - echo "::error ::bfs test failed: $new. $new is passing on 'main'. Maybe you have to rebase?" - fi -done diff --git a/util/diff-gnu.sh b/util/diff-gnu.sh deleted file mode 100755 index f29f358d..00000000 --- a/util/diff-gnu.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -set -eu - -export LC_COLLATE=C - -# Extract the failing test lines from log files -failing_tests() { - sed -En 's/FAIL: ([^,:]*)[,:].*/\1/p' "$1"/{tests,{find,xargs}/testsuite}/*.log | sort -} - -comm -3 <(failing_tests "$1") <(failing_tests "$2") | tr '\t' ',' | while IFS=, read old new foo; do - if [ -n "$old" ]; then - echo "::warning ::Congrats! The GNU test $old is now passing!" - fi - if [ -n "$new" ]; then - echo "::error ::GNU test failed: $new. $new is passing on 'main'. Maybe you have to rebase?" - fi -done diff --git a/util/gnu_json_result.py b/util/gnu_json_result.py new file mode 100755 index 00000000..4029caeb --- /dev/null +++ b/util/gnu_json_result.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 + +""" +Build a per-test JSON summary of a GNU findutils `make check` run. + +GNU findutils runs tests through three harnesses that each log results +differently: + + * dejagnu -> find/testsuite/find.log and xargs/testsuite/xargs.log + ("PASS: " / "FAIL: , " lines) + * automake -> tests/**/*.log, one log per test script, each ending with a + " tests/.sh (exit status: N)" line + +The names are naturally disjoint (dejagnu find names end in ".new-O[0-3]", +automake names start with "tests/"), so they can share one flat namespace. + +Output format (consumed by compare_test_results.py): + + { + "summary": {"total": N, "passed": P, "failed": F, "skipped": S}, + "tests": [{"name": "...", "status": "PASS|FAIL|SKIP"}, ...] + } +""" + +import json +import re +import sys +from pathlib import Path + +# dejagnu status -> normalized status. Anything unexpected counts as FAIL so a +# regression is never silently dropped. +DEJAGNU = { + "PASS": "PASS", + "XFAIL": "PASS", # expected failure: not a regression + "FAIL": "FAIL", + "XPASS": "FAIL", # unexpected pass: worth surfacing + "ERROR": "FAIL", + "UNRESOLVED": "FAIL", + "UNSUPPORTED": "SKIP", + "UNTESTED": "SKIP", +} + +DEJAGNU_RE = re.compile(r"^(PASS|XFAIL|FAIL|XPASS|ERROR|UNRESOLVED|UNSUPPORTED|UNTESTED): (.+)$") +# automake per-test trailer, e.g. "FAIL tests/find/used.sh (exit status: 1)" +AUTOMAKE_RE = re.compile(r"^(PASS|FAIL|SKIP|XPASS|XFAIL|ERROR) (tests/\S+?)(?:\.sh)? \(exit status: \d+\)$") +AUTOMAKE = { + "PASS": "PASS", + "XFAIL": "PASS", + "FAIL": "FAIL", + "XPASS": "FAIL", + "ERROR": "FAIL", + "SKIP": "SKIP", +} + + +def _read(path): + return path.read_text(encoding="utf-8", errors="replace").splitlines() + + +def _record(tests, name, status): + """Merge a status into `tests`, keeping failure sticky. + + DejaGnu emits one line per assertion, so a single test name can appear many + times with mixed results. A test counts as FAIL if any assertion failed, + else PASS if any passed, else SKIP. + """ + prev = tests.get(name) + if prev == "FAIL" or status == "FAIL": + tests[name] = "FAIL" + elif prev == "PASS" or status == "PASS": + tests[name] = "PASS" + else: + tests[name] = "SKIP" + + +def collect(root): + """Return {name: status} for every test found under `root`.""" + root = Path(root) + tests = {} + + # dejagnu logs + for rel in ("find/testsuite/find.log", "xargs/testsuite/xargs.log"): + log = root / rel + if not log.is_file(): + continue + for line in _read(log): + m = DEJAGNU_RE.match(line) + if not m: + continue + status, rest = m.group(1), m.group(2) + # FAIL lines carry a trailing ", "; the name is the head. + name = rest.split(",", 1)[0].strip() + _record(tests, name, DEJAGNU[status]) + + # automake per-test logs (skip the aggregate test-suite.log) + for log in (root / "tests").rglob("*.log"): + if log.name == "test-suite.log": + continue + for line in _read(log): + m = AUTOMAKE_RE.match(line) + if m: + _record(tests, m.group(2), AUTOMAKE[m.group(1)]) + break + + return tests + + +def build(root): + tests = collect(root) + passed = sum(1 for s in tests.values() if s == "PASS") + failed = sum(1 for s in tests.values() if s == "FAIL") + skipped = sum(1 for s in tests.values() if s == "SKIP") + return { + "summary": { + "total": len(tests), + "passed": passed, + "failed": failed, + "skipped": skipped, + }, + "tests": [ + {"name": name, "status": status} + for name, status in sorted(tests.items()) + ], + } + + +def main(): + if len(sys.argv) != 3: + print(f"usage: {sys.argv[0]} ", file=sys.stderr) + return 2 + result = build(sys.argv[1]) + with open(sys.argv[2], "w", encoding="utf-8") as f: + json.dump(result, f, indent=2, sort_keys=True) + f.write("\n") + s = result["summary"] + print( + f"GNU findutils tests summary = TOTAL: {s['total']} / " + f"PASS: {s['passed']} / FAIL: {s['failed']} / SKIP: {s['skipped']}" + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main())