diff --git a/.circleci/config.yml b/.circleci/config.yml index e097ce4ea0..7f7f01cb6d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -65,21 +65,24 @@ commands: jobs: test: - docker: - - image: cimg/go:1.25 - - image: redis:6.2 + # Use the machine executor so testcontainers-go has a local Docker daemon + # to drive β€” the redis testcontainer needs to be reachable from the test + # process via a host port, which the docker executor cannot provide. + machine: + image: ubuntu-2404:current resource_class: xlarge steps: - checkout - - restore_cache: - keys: - - v1-dockerize-{{ checksum "Makefile" }} - - v1-dockerize- - - run: make dockerize - - save_cache: - key: v1-dockerize-{{ checksum "Makefile" }} - paths: - - dockerize.tar.gz + - run: + name: Install Go and gotestsum + command: | + GO_VERSION=1.25.3 + curl -fsSL "https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz" -o /tmp/go.tar.gz + sudo rm -rf /usr/local/go + sudo tar -C /usr/local -xzf /tmp/go.tar.gz + echo 'export PATH=/usr/local/go/bin:$HOME/go/bin:$PATH' >> "$BASH_ENV" + export PATH=/usr/local/go/bin:$HOME/go/bin:$PATH + go install gotest.tools/gotestsum@latest - restore_cache: keys: - v3-go-mod-{{ checksum "go.sum" }} diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index e52fa0e62c..71031f1523 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1 @@ -* @honeycombio/pipeline-team +* @honeycombio/agentic-observability diff --git a/.github/dependabot.yml b/.github/dependabot.yml index a5e4aa310b..b323bc40c2 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -11,8 +11,6 @@ updates: interval: "monthly" labels: - "type: dependencies" - reviewers: - - "honeycombio/pipeline-team" groups: minor-patch: update-types: diff --git a/CHANGELOG.md b/CHANGELOG.md index f10acfd4d8..610c996381 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,58 @@ # Refinery Changelog +## 3.2.2 2026-05-26 + +### πŸ› Fixes + +- fix: validator exits non-zero on YAML parse errors in rules by @VinozzZ in https://github.com/honeycombio/refinery/pull/1820 +- fix: increment send_errors for network errors from request by @VinozzZ in https://github.com/honeycombio/refinery/pull/1823 +- fix: make sure FieldList is sorted before use it as dynsamplerKey by @VinozzZ in https://github.com/honeycombio/refinery/pull/1825 +- fix: overcounting dynsampler event_count and request_count by @VinozzZ in https://github.com/honeycombio/refinery/pull/1826 +- fix: copy fields slice before sorting in newTraceKey by @VinozzZ in https://github.com/honeycombio/refinery/pull/1827 + +### πŸ›  Maintenance + +- maint: remove proto/otlp fork reference by @VinozzZ in https://github.com/honeycombio/refinery/pull/1822 + +## 3.2.1 2026-05-04 + +This release fixes a bug in OTLP JSON ingestion where `traceId` and `spanId` fields were incorrectly treated as base64-encoded. The OTLP JSON spec explicitly requires these fields to be hex-encoded strings, and clients sending data over OTLP HTTP/JSON would receive corrupted ID values as a result. + +### πŸ›  Maintenance + +- maint: update honeycombio/husky to v0.43.1 by @VinozzZ in https://github.com/honeycombio/refinery/pull/1816 +- maint(deps): bump the minor-patch group with 8 updates by @dependabot in https://github.com/honeycombio/refinery/pull/1814 + +## 3.2.0 2026-04-13 + +### πŸ’‘ Enhancements + +- feat: add ReceiveKeyIDs config option for key ID-based authorization by @tdarwin in https://github.com/honeycombio/refinery/pull/1803 +- feat: add OTelMetrics.AdditionalAttributes config option by @tdarwin in https://github.com/honeycombio/refinery/pull/1804 +- feat: add granular event metrics by @tdarwin in https://github.com/honeycombio/refinery/pull/1805 + +### πŸ› Fixes + +- fix: include AdditionalErrorFields in logs for transmission code by @VinozzZ in https://github.com/honeycombio/refinery/pull/1807 + +### πŸ›  Maintenance + +- fix: update ko build tooling and fix flaky integration test by @tdarwin in https://github.com/honeycombio/refinery/pull/1806 +- maint(deps): bump go.opentelemetry.io/otel/sdk from 1.42.0 to 1.43.0 by @dependabot in https://github.com/honeycombio/refinery/pull/1810 +- maint(deps): bump the minor-patch group across 1 directory with 12 updates by @dependabot in https://github.com/honeycombio/refinery/pull/1812 + +## 3.1.2 2026-03-25 + +This release addresses security vulnerabilities CVE-2026-27139, CVE-2026-27142, and CVE-2026-25679. + +### Features + +- feat: add capacity/limit companion metrics for queues and memory by @mterhar in https://github.com/honeycombio/refinery/pull/1799 + +### Maintenance + +- maint(deps): bump the minor-patch group across 1 directory with 21 updates by @dependabot in https://github.com/honeycombio/refinery/pull/1795 + ## 3.1.1 2026-02-25 ### Features diff --git a/LICENSES/github.com/hashicorp/go-version/.github/workflows/go-tests.yml b/LICENSES/github.com/hashicorp/go-version/.github/workflows/go-tests.yml index 34a4771aba..5c199c7226 100644 --- a/LICENSES/github.com/hashicorp/go-version/.github/workflows/go-tests.yml +++ b/LICENSES/github.com/hashicorp/go-version/.github/workflows/go-tests.yml @@ -1,6 +1,8 @@ name: go-tests -on: [push] +on: + pull_request: + branches: [ main ] env: TEST_RESULTS: /tmp/test-results @@ -11,16 +13,16 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - go-version: [ 1.15.3, 1.19 ] + go-version: ['stable', 'oldstable'] steps: - name: Setup go - uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 with: go-version: ${{ matrix.go-version }} - name: Checkout code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Create test directory run: | @@ -30,7 +32,7 @@ jobs: run: go mod download - name: Cache / restore go modules - uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 + uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4 with: path: | ~/go/pkg/mod @@ -50,7 +52,7 @@ jobs: fi - name: Run golangci-lint - uses: golangci/golangci-lint-action@4afd733a84b1f43292c63897423277bb7f4313a9 + uses: golangci/golangci-lint-action@1e7e51e771db61008b38414a730f564565cf7c20 # Install gotestsum with go get for 1.15.3; otherwise default to go install - name: Install gotestsum @@ -71,13 +73,13 @@ jobs: # Save coverage report parts - name: Upload and save artifacts - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f with: name: Test Results-${{matrix.go-version}} path: ${{ env.TEST_RESULTS }} - name: Upload coverage report - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f with: path: coverage.out name: Coverage-report-${{matrix.go-version}} diff --git a/LICENSES/github.com/hashicorp/go-version/CHANGELOG.md b/LICENSES/github.com/hashicorp/go-version/CHANGELOG.md index 6d48174bfb..81b423151c 100644 --- a/LICENSES/github.com/hashicorp/go-version/CHANGELOG.md +++ b/LICENSES/github.com/hashicorp/go-version/CHANGELOG.md @@ -1,3 +1,41 @@ +# 1.9.0 (Mar 30, 2026) + +ENHANCEMENTS: + +Support parsing versions with custom prefixes via opt-in option in https://github.com/hashicorp/go-version/pull/79 + +INTERNAL: + +- Bump the github-actions-backward-compatible group across 1 directory with 2 updates in https://github.com/hashicorp/go-version/pull/179 +- Bump the github-actions-breaking group with 4 updates in https://github.com/hashicorp/go-version/pull/180 +- Bump the github-actions-backward-compatible group with 3 updates in https://github.com/hashicorp/go-version/pull/182 +- Update GitHub Actions to trigger on pull requests and update go version in https://github.com/hashicorp/go-version/pull/185 +- Bump actions/upload-artifact from 6.0.0 to 7.0.0 in the github-actions-breaking group across 1 directory in https://github.com/hashicorp/go-version/pull/183 +- Bump the github-actions-backward-compatible group across 1 directory with 2 updates in https://github.com/hashicorp/go-version/pull/186 + +# 1.8.0 (Nov 28, 2025) + +ENHANCEMENTS: + +- Add benchmark test for version.String() in https://github.com/hashicorp/go-version/pull/159 +- Bytes implementation in https://github.com/hashicorp/go-version/pull/161 + +INTERNAL: + +- Add CODEOWNERS file in .github/CODEOWNERS in https://github.com/hashicorp/go-version/pull/145 +- Linting in https://github.com/hashicorp/go-version/pull/151 +- Correct typos in comments in https://github.com/hashicorp/go-version/pull/134 +- Migrate GitHub Actions updates from TSCCR to Dependabot in https://github.com/hashicorp/go-version/pull/155 +- Bump the github-actions-backward-compatible group with 2 updates in https://github.com/hashicorp/go-version/pull/157 +- Update doc reference in README in https://github.com/hashicorp/go-version/pull/135 +- Bump the github-actions-breaking group with 3 updates in https://github.com/hashicorp/go-version/pull/156 +- [Compliance] - PR Template Changes Required in https://github.com/hashicorp/go-version/pull/158 +- Bump actions/cache from 4.2.3 to 4.2.4 in the github-actions-backward-compatible group in https://github.com/hashicorp/go-version/pull/167 +- Bump actions/checkout from 4.2.2 to 5.0.0 in the github-actions-breaking group in https://github.com/hashicorp/go-version/pull/166 +- Bump the github-actions-breaking group across 1 directory with 2 updates in https://github.com/hashicorp/go-version/pull/171 +- [IND-4226] [COMPLIANCE] Update Copyright Headers in https://github.com/hashicorp/go-version/pull/172 +- drop init() in https://github.com/hashicorp/go-version/pull/175 + # 1.7.0 (May 24, 2024) ENHANCEMENTS: diff --git a/LICENSES/github.com/hashicorp/go-version/README.md b/LICENSES/github.com/hashicorp/go-version/README.md index 83a8249f72..5528960215 100644 --- a/LICENSES/github.com/hashicorp/go-version/README.md +++ b/LICENSES/github.com/hashicorp/go-version/README.md @@ -34,6 +34,32 @@ if v1.LessThan(v2) { } ``` +#### Version Parsing and Comparison with Prefixes + +The library also supports parsing versions with a custom prefix. +Using the `WithPrefix` option, you can specify a prefix to strip +before parsing the version. + +Use `WithPrefix` when your input strings carry a known release prefix such as +`deployment-`, `controller-`, etc. + +After parsing, the prefix is not part of the canonical version value. This +means the regular comparison methods such as `Compare`, `LessThan`, `Equal`, +and `GreaterThan` compare only the stripped version. If you compare versions +from different prefixes with these methods, the prefixes are ignored. If you +need to reject cross-prefix comparisons, inspect the parsed prefixes before +comparing the versions. + +```go +v1, _ := version.NewVersion("deployment-v1.2.3-beta+metadata", version.WithPrefix("deployment-")) +v2, _ := version.NewVersion("deployment-v1.2.4", version.WithPrefix("deployment-")) + +if v1.LessThan(v2) { + fmt.Printf("%s (%s) is less than %s (%s)\n", v1, v1.Original(), v2, v2.Original()) + // Outputs: 1.2.3-beta+metadata (deployment-v1.2.3-beta+metadata) is less than 1.2.4 (deployment-v1.2.4) +} +``` + #### Version Constraints ```go diff --git a/LICENSES/github.com/hashicorp/go-version/version.go b/LICENSES/github.com/hashicorp/go-version/version.go index 17b29732ee..b95503d3cf 100644 --- a/LICENSES/github.com/hashicorp/go-version/version.go +++ b/LICENSES/github.com/hashicorp/go-version/version.go @@ -49,6 +49,23 @@ const ( `?` ) +// Optional options for NewVersion function. +type options struct { + // If set, this prefix will be trimmed from the version string before parsing. + prefix string +} + +// Option is a functional option for NewVersion. +type Option func(*options) + +// WithPrefix is a functional option that sets a prefix to be removed from the +// version string before parsing. +func WithPrefix(prefix string) Option { + return func(o *options) { + o.prefix = prefix + } +} + // Version represents a single version. type Version struct { metadata string @@ -56,12 +73,36 @@ type Version struct { segments []int64 si int original string + prefix string } -// NewVersion parses the given version and returns a new -// Version. -func NewVersion(v string) (*Version, error) { - return newVersion(v, getVersionRegexp()) +// NewVersion parses the given version and returns a new Version. +// +// Optional parsing behavior can be enabled with Option values such as +// WithPrefix, which validates and strips an expected prefix before parsing. +func NewVersion(v string, opts ...Option) (*Version, error) { + options := &options{} + for _, opt := range opts { + if opt != nil { + opt(options) + } + } + + vToParse := v + if options.prefix != "" { + if !strings.HasPrefix(v, options.prefix) { + return nil, fmt.Errorf("version %q does not have prefix %q", v, options.prefix) + } + vToParse = strings.TrimPrefix(v, options.prefix) + } + + ver, err := newVersion(vToParse, getVersionRegexp()) + if err != nil { + return nil, err + } + ver.prefix = options.prefix + ver.original = v + return ver, nil } // NewSemver parses the given version and returns a new @@ -424,6 +465,11 @@ func (v *Version) Original() string { return v.original } +// Prefix returns the explicit prefix used with WithPrefix, if any. +func (v *Version) Prefix() string { + return v.prefix +} + // UnmarshalText implements encoding.TextUnmarshaler interface. func (v *Version) UnmarshalText(b []byte) error { temp, err := NewVersion(string(b)) diff --git a/LICENSES/github.com/hashicorp/go-version/version_test.go b/LICENSES/github.com/hashicorp/go-version/version_test.go index 15a062324f..8da634559b 100644 --- a/LICENSES/github.com/hashicorp/go-version/version_test.go +++ b/LICENSES/github.com/hashicorp/go-version/version_test.go @@ -39,6 +39,8 @@ func TestNewVersion(t *testing.T) { {"1.7rc2", false}, {"v1.7rc2", false}, {"1.0-", false}, + {"controller-v0.40.2", true}, + {"azure-cli-v1.4.2", true}, } for _, tc := range cases { @@ -51,6 +53,33 @@ func TestNewVersion(t *testing.T) { } } +func TestNewVersionWithPrefix(t *testing.T) { + cases := []struct { + version string + prefix string + err bool + }{ + {"", "release-", true}, + {"rel-1.2.3", "release-", true}, + {"release_1.2.3", "release-", true}, + {"release_1.2.0-x.Y.0+metadata", "release_", false}, + {"release-1.2.0-x.Y.0+metadata-width-hyphen", "release-", false}, + {"myrelease-1.2.3-rc1-with-hyphen", "myrelease-", false}, + {"prefix-1.2.3.4", "prefix-", false}, + {"controller-v0.40.2", "controller-", false}, + {"azure-cli-v1.4.2", "azure-cli-", false}, + } + + for _, tc := range cases { + _, err := NewVersion(tc.version, WithPrefix(tc.prefix)) + if tc.err && err == nil { + t.Fatalf("expected error for version: %q", tc.version) + } else if !tc.err && err != nil { + t.Fatalf("error for version %q: %s", tc.version, err) + } + } +} + func TestNewSemver(t *testing.T) { cases := []struct { version string @@ -80,6 +109,8 @@ func TestNewSemver(t *testing.T) { {"1.7rc2", true}, {"v1.7rc2", true}, {"1.0-", true}, + {"controller-v0.40.2", true}, + {"azure-cli-v1.4.2", true}, } for _, tc := range cases { @@ -171,6 +202,107 @@ func TestVersionCompare(t *testing.T) { } } +func TestVersionCompareWithPrefix(t *testing.T) { + cases := []struct { + v1 string + v1Prefix string + v2 string + v2Prefix string + expected int + }{ + {"controller-v0.40.2", "controller-", "controller-v0.40.3", "controller-", -1}, + {"0.40.4", "", "controller-v0.40.2", "controller-", 1}, + {"0.40.4", "", "controller-v0.40.4", "controller-", 0}, + {"azure-cli-v1.4.2", "azure-cli-", "azure-cli-v1.4.2", "azure-cli-", 0}, + {"azure-cli-v1.4.1", "azure-cli-", "azure-cli-v1.4.2", "azure-cli-", -1}, + {"1.4.3", "", "azure-cli-v1.4.2", "azure-cli-", 1}, + {"v1.4.3", "", "azure-cli-v1.4.2", "azure-cli-", 1}, + {"controller-v1.4.1", "controller-", "azure-cli-v1.4.2", "azure-cli-", -1}, + } + + for _, tc := range cases { + var v1 *Version + var err error + if tc.v1Prefix != "" { + v1, err = NewVersion(tc.v1, WithPrefix(tc.v1Prefix)) + } else { + v1, err = NewVersion(tc.v1) + } + if err != nil { + t.Fatalf("err: %s", err) + } + + var v2 *Version + if tc.v2Prefix != "" { + v2, err = NewVersion(tc.v2, WithPrefix(tc.v2Prefix)) + } else { + v2, err = NewVersion(tc.v2) + } + if err != nil { + t.Fatalf("err: %s", err) + } + + actual := v1.Compare(v2) + expected := tc.expected + if actual != expected { + t.Fatalf( + "%s <=> %s\nexpected: %d\nactual: %d", + tc.v1, tc.v2, + expected, actual) + } + } +} + +func TestVersionAccessorsWithPrefix(t *testing.T) { + v, err := NewVersion("controller-v1.2.0-beta.2+build.5", WithPrefix("controller-")) + if err != nil { + t.Fatalf("err: %s", err) + } + + if got := v.Prefix(); got != "controller-" { + t.Fatalf("expected prefix %q, got %q", "controller-", got) + } + + if got := v.Original(); got != "controller-v1.2.0-beta.2+build.5" { + t.Fatalf("expected original %q, got %q", "controller-v1.2.0-beta.2+build.5", got) + } + + if got := v.String(); got != "1.2.0-beta.2+build.5" { + t.Fatalf("expected string %q, got %q", "1.2.0-beta.2+build.5", got) + } + + if got := v.Metadata(); got != "build.5" { + t.Fatalf("expected metadata %q, got %q", "build.5", got) + } + + if got := v.Prerelease(); got != "beta.2" { + t.Fatalf("expected prerelease %q, got %q", "beta.2", got) + } + + expectedSegments := []int{1, 2, 0} + if got := v.Segments(); !reflect.DeepEqual(got, expectedSegments) { + t.Fatalf("expected segments %#v, got %#v", expectedSegments, got) + } + + expectedSegments64 := []int64{1, 2, 0} + if got := v.Segments64(); !reflect.DeepEqual(got, expectedSegments64) { + t.Fatalf("expected segments64 %#v, got %#v", expectedSegments64, got) + } +} + +func TestVersionSegmentsWithPrefix(t *testing.T) { + v, err := NewVersion("azure-cli-v1.4.2", WithPrefix("azure-cli-")) + if err != nil { + t.Fatalf("err: %s", err) + } + + expected := []int{1, 4, 2} + actual := v.Segments() + if !reflect.DeepEqual(actual, expected) { + t.Fatalf("expected: %#v\nactual: %#v", expected, actual) + } +} + func TestVersionCompare_versionAndSemver(t *testing.T) { cases := []struct { versionRaw string diff --git a/LICENSES/github.com/dgryski/go-rendezvous/LICENSE b/LICENSES/go.uber.org/atomic/LICENSE.txt similarity index 92% rename from LICENSES/github.com/dgryski/go-rendezvous/LICENSE rename to LICENSES/go.uber.org/atomic/LICENSE.txt index 22080f736a..8765c9fbc6 100644 --- a/LICENSES/github.com/dgryski/go-rendezvous/LICENSE +++ b/LICENSES/go.uber.org/atomic/LICENSE.txt @@ -1,6 +1,4 @@ -The MIT License (MIT) - -Copyright (c) 2017-2020 Damian Gryski +Copyright (c) 2016 Uber Technologies, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Makefile b/Makefile index 189fcd6ccf..4226762e68 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ test: test_with_race test_all .PHONY: test_with_race #: run only tests tagged with potential race conditions -test_with_race: test_results wait_for_redis +test_with_race: test_results @echo @echo "+++ testing - race conditions?" @echo @@ -18,7 +18,7 @@ test_with_race: test_results wait_for_redis .PHONY: test_all #: run all tests, but with no race condition detection -test_all: test_results wait_for_redis +test_all: test_results @echo @echo "+++ testing - all the tests" @echo @@ -34,19 +34,10 @@ local_image: ko crane ./build-docker.sh docker tag $$(docker images ko.local/refinery --quiet | head -1) ko.local/refinery:local -.PHONY: wait_for_redis -# wait for Redis to become available for test suite -wait_for_redis: dockerize - @echo - @echo "+++ We need a Redis running to run the tests." - @echo - @echo "Checking with dockerize $(shell ./dockerize --version)" - @./dockerize -wait tcp://localhost:6379 -timeout 30s - # You can override this version from an environment variable. HOST_OS := $(shell uname -s | tr A-Z a-z) # You can override this version from an environment variable. -KO_VERSION ?= 0.11.2 +KO_VERSION ?= 0.18.0 KO_RELEASE_ASSET := ko_${KO_VERSION}_${HOST_OS}_x86_64.tar.gz # ensure the ko command is available ko: ko_${KO_VERSION}.tar.gz @@ -109,7 +100,7 @@ DOCKERIZE_RELEASE_ASSET := dockerize-${HOST_OS}-amd64-${DOCKERIZE_VERSION}.tar.g dockerize.tar.gz: @echo - @echo "+++ Retrieving dockerize tool for Redis readiness check." + @echo "+++ Retrieving dockerize tool for service readiness checks." @echo # make sure that file is available ifeq (, $(shell command -v file)) @@ -189,7 +180,3 @@ unsmoke: @echo "+++ Spinning down the smokers." @echo "" cd smoke-test && docker-compose down --volumes - - - - diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 2e25ca23f1..8b05b3241c 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -2,6 +2,46 @@ While [CHANGELOG.md](./CHANGELOG.md) contains detailed documentation and links to all the source code changes in a given release, this document is intended to be aimed at a more comprehensible version of the contents of the release from the point of view of users of Refinery. +## Version 3.2.2 + +This release fixes dynamic sampling correctness and metrics accuracy when multiple collector workers are enabled. + +### Fixes + +* **Throughput sampler correctness**: Fixed throughput targets not being met. If you set `WorkerCount` to `1` as a workaround, you can now remove that override. +* **Dynsampler metrics accuracy**: Fixed `event_count` and `request_count` being reported higher than actual throughput. +* Fixed `send_errors` not being incremented for network-level transmission errors. +* Fixed the config validator not exiting with a non-zero code on YAML parse errors in rules files. + +## Version 3.2.1 + +This release fixes a bug where trace and span IDs were corrupted for clients sending data over OTLP HTTP/JSON. + +## Version 3.2.0 + +This release adds new configuration options for authorization and observability. + +### Configuration Changes + +* **Added**: `AccessKeys.ReceiveKeyIDs` - authorizes incoming traffic by Honeycomb ingest key IDs (obtained from the `/1/auth` endpoint) instead of requiring full API keys. Supports live reload alongside the existing `ReceiveKeys` option. +* **Added**: `OTelMetrics.AdditionalAttributes` - injects custom resource attributes (e.g., cluster ID, environment name) into all OTLP metrics emitted by Refinery. Supplied as comma-separated `key:value` pairs. + +### New Metrics + +* `events_dropped` - Counter tracking the number of events dropped by Refinery. + +### Fixes + +* Fixed `AdditionalErrorFields` not being included in transmission error logs. + +## Version 3.1.2 + +This patch release primarily addresses security vulnerabilities in dependencies. + +### Maintenance + +- Updated dependencies to address security vulnerabilities CVE-2026-27139, CVE-2026-27142, and CVE-2026-25679. + ## Version 3.1.1 This patch release includes bug fixes and a new feature for configuring additional HTTP headers. diff --git a/agent/agent.go b/agent/agent.go index f408d8bfd6..736598dd7d 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -228,6 +228,22 @@ func (agent *Agent) healthCheck() { agent.usageTracker.Add(signal_traces, traceUsage) agent.usageTracker.Add(signal_logs, logUsage) + + var eventsReceived float64 + if v, ok := agent.metrics.Get("incoming_router_span"); ok { + eventsReceived += v + } + if v, ok := agent.metrics.Get("incoming_router_nonspan_event"); ok { + eventsReceived += v + } + if v, ok := agent.metrics.Get("incoming_router_event"); ok { + eventsReceived += v + } + agent.usageTracker.Add(signal_events_received, eventsReceived) + + if eventsDropped, ok := agent.metrics.Get("events_dropped"); ok { + agent.usageTracker.Add(signal_events_dropped, eventsDropped) + } } } } diff --git a/agent/otlp_metrics.go b/agent/otlp_metrics.go index 7e9e8140e4..b5e275925c 100644 --- a/agent/otlp_metrics.go +++ b/agent/otlp_metrics.go @@ -9,9 +9,22 @@ import ( "go.opentelemetry.io/collector/pdata/pmetric" ) +type metricMapping struct { + metricName string + signal string +} + +var signalToMetric = map[usageSignal]metricMapping{ + signal_traces: {metricName: "bytes_received", signal: "traces"}, + signal_logs: {metricName: "bytes_received", signal: "logs"}, + signal_events_received: {metricName: "events_received", signal: ""}, + signal_events_dropped: {metricName: "events_dropped", signal: ""}, +} + type otlpMetrics struct { metrics pmetric.Metrics - ms pmetric.Sum + sums map[string]pmetric.Sum + sm pmetric.ScopeMetrics } func newOTLPMetrics(serviceName, version, hostname string) *otlpMetrics { @@ -22,25 +35,42 @@ func newOTLPMetrics(serviceName, version, hostname string) *otlpMetrics { resourceAttrs.PutStr("service.version", version) resourceAttrs.PutStr("host.name", hostname) sm := rm.ScopeMetrics().AppendEmpty() - ms := sm.Metrics().AppendEmpty() - ms.SetName("bytes_received") - sum := ms.SetEmptySum() - sum.SetAggregationTemporality(pmetric.AggregationTemporalityDelta) return &otlpMetrics{ metrics: metrics, - ms: sum, + sums: make(map[string]pmetric.Sum), + sm: sm, + } +} + +func (om *otlpMetrics) getOrCreateSum(metricName string) pmetric.Sum { + if sum, ok := om.sums[metricName]; ok { + return sum } + ms := om.sm.Metrics().AppendEmpty() + ms.SetName(metricName) + sum := ms.SetEmptySum() + sum.SetAggregationTemporality(pmetric.AggregationTemporalityDelta) + om.sums[metricName] = sum + return sum } func (om *otlpMetrics) addOTLPSum(timestamp time.Time, value float64, signal usageSignal) error { + mapping, ok := signalToMetric[signal] + if !ok { + return fmt.Errorf("unknown usage signal: %s", signal) + } + intVal, err := convertFloat64ToInt64(value) if err != nil { return err } - d := om.ms.DataPoints().AppendEmpty() + sum := om.getOrCreateSum(mapping.metricName) + d := sum.DataPoints().AppendEmpty() d.SetTimestamp(pcommon.NewTimestampFromTime(timestamp)) d.SetIntValue(intVal) - d.Attributes().PutStr("signal", string(signal)) + if mapping.signal != "" { + d.Attributes().PutStr("signal", mapping.signal) + } return nil } diff --git a/agent/usage_report.go b/agent/usage_report.go index 0d947a021c..f48e4e333b 100644 --- a/agent/usage_report.go +++ b/agent/usage_report.go @@ -89,6 +89,8 @@ func (ur *usageTracker) completeSend() { type usageSignal string var ( - signal_traces usageSignal = "traces" - signal_logs usageSignal = "logs" + signal_traces usageSignal = "traces" + signal_logs usageSignal = "logs" + signal_events_received usageSignal = "events_received" + signal_events_dropped usageSignal = "events_dropped" ) diff --git a/app/app_test.go b/app/app_test.go index 1ac3a737cd..cc4bd18788 100644 --- a/app/app_test.go +++ b/app/app_test.go @@ -34,6 +34,7 @@ import ( "github.com/honeycombio/refinery/config" "github.com/honeycombio/refinery/internal/health" "github.com/honeycombio/refinery/internal/peer" + "github.com/honeycombio/refinery/internal/redistest" "github.com/honeycombio/refinery/logger" "github.com/honeycombio/refinery/metrics" "github.com/honeycombio/refinery/pubsub" @@ -235,11 +236,11 @@ func (w *countingTransmission) waitForCount(t testing.TB, n int) { // each test gets a unique port and redisDB. // // by default, every Redis instance supports 16 databases, we use redisDB as a way to separate test data -func defaultConfig(basePort int, redisDB int, apiURL string) *config.MockConfig { - return defaultConfigWithGRPC(basePort, redisDB, apiURL, false) +func defaultConfig(t testing.TB, basePort int, redisDB int, apiURL string) *config.MockConfig { + return defaultConfigWithGRPC(t, basePort, redisDB, apiURL, false) } -func defaultConfigWithGRPC(basePort int, redisDB int, apiURL string, enableGRPC bool) *config.MockConfig { +func defaultConfigWithGRPC(t testing.TB, basePort int, redisDB int, apiURL string, enableGRPC bool) *config.MockConfig { if redisDB >= 16 { panic("redisDB must be less than 16") } @@ -247,6 +248,8 @@ func defaultConfigWithGRPC(basePort int, redisDB int, apiURL string, enableGRPC apiURL = "http://api.honeycomb.io" } + redisHost, redisPort := redistest.Endpoint(t) + cfg := &config.MockConfig{ GetTracesConfigVal: config.TracesConfig{ SendTicker: config.Duration(2 * time.Millisecond), @@ -258,6 +261,7 @@ func defaultConfigWithGRPC(basePort int, redisDB int, apiURL string, enableGRPC AddRuleReasonToTrace: true, PeerManagementType: "redis", GetRedisPeerManagementVal: config.RedisPeerManagementConfig{ + Host: redisHost + ":" + redisPort, Prefix: "refinery-app-test", Timeout: config.Duration(1 * time.Second), Database: redisDB, @@ -382,10 +386,19 @@ func newStartedApp( assert.NoError(t, err) err = startstop.Start(g.Objects(), nil) - assert.NoError(t, err) + require.NoError(t, err) + + // Wait for the HTTP server to be ready by polling the listen address. + listenAddr := c.GetListenAddr() + require.Eventually(t, func() bool { + conn, err := net.DialTimeout("tcp", listenAddr, 50*time.Millisecond) + if err != nil { + return false + } + conn.Close() + return true + }, 2*time.Second, 10*time.Millisecond, "server failed to start listening on %s", listenAddr) - // Racy: wait just a moment for ListenAndServe to start up. - time.Sleep(15 * time.Millisecond) return &a, g } @@ -432,7 +445,7 @@ func TestAppIntegration(t *testing.T) { redisDB := 2 testServer := newTestAPIServer(t) - cfg := defaultConfig(port, redisDB, testServer.server.URL) + cfg := defaultConfig(t, port, redisDB, testServer.server.URL) app, graph := newStartedApp(t, nil, nil, cfg) // Send a root span, it should be sent in short order. @@ -679,7 +692,7 @@ func TestAppIntegrationSendKey(t *testing.T) { redisDB := 1 + i testServer := newTestAPIServer(t) - cfg := defaultConfig(port, redisDB, testServer.server.URL) + cfg := defaultConfig(t, port, redisDB, testServer.server.URL) cfg.GetAccessKeyConfigVal = config.AccessKeyConfig{ SendKey: tt.sendKey, SendKeyMode: tt.sendKeyMode, @@ -893,7 +906,7 @@ func TestAppIntegrationWithNonLegacyKey(t *testing.T) { redisDB := 3 testServer := newTestAPIServer(t) - cfg := defaultConfig(port, redisDB, testServer.server.URL) + cfg := defaultConfig(t, port, redisDB, testServer.server.URL) a, graph := newStartedApp(t, nil, nil, cfg) a.IncomingRouter.SetEnvironmentCache(time.Second, func(s string) (string, error) { return "test", nil }) a.PeerRouter.SetEnvironmentCache(time.Second, func(s string) (string, error) { return "test", nil }) @@ -933,7 +946,7 @@ func TestAppIntegrationEmptyEvent(t *testing.T) { port := 19010 redisDB := 8 - cfg := defaultConfig(port, redisDB, "") + cfg := defaultConfig(t, port, redisDB, "") _, graph := newStartedApp(t, nil, nil, cfg) tt := []struct { @@ -996,7 +1009,7 @@ func TestPeerRouting(t *testing.T) { senders[i] = &transmit.MockTransmission{} peers := peer.NewMockPeers(peerList, peerList[i]) redisDB := 5 + i - cfg := defaultConfig(basePort, redisDB, "") + cfg := defaultConfig(t, basePort, redisDB, "") apps[i], graph = newStartedApp(t, senders[i], peers, cfg) defer startstop.Stop(graph.Objects(), nil) @@ -1071,7 +1084,7 @@ func TestHostMetadataSpanAdditions(t *testing.T) { redisDB := 7 testServer := newTestAPIServer(t) - cfg := defaultConfig(port, redisDB, testServer.server.URL) + cfg := defaultConfig(t, port, redisDB, testServer.server.URL) cfg.AddHostMetadataToTrace = true app, graph := newStartedApp(t, nil, nil, cfg) @@ -1125,7 +1138,7 @@ func TestEventsEndpoint(t *testing.T) { peers := peer.NewMockPeers(peerList, peerList[i]) redisDB := 8 + i - cfg := defaultConfig(basePort, redisDB, "") + cfg := defaultConfig(t, basePort, redisDB, "") apps[i], graph = newStartedApp(t, senders[i], peers, cfg) defer startstop.Stop(graph.Objects(), nil) } @@ -1221,7 +1234,7 @@ func TestEventsEndpointWithNonLegacyKey(t *testing.T) { peers := peer.NewMockPeers(peerList, peerList[i]) redisDB := 10 + i - cfg := defaultConfig(basePort, redisDB, "") + cfg := defaultConfig(t, basePort, redisDB, "") app, graph := newStartedApp(t, senders[i], peers, cfg) app.IncomingRouter.SetEnvironmentCache(time.Second, func(s string) (string, error) { return "test", nil }) @@ -1309,7 +1322,7 @@ func TestOTLPProtobufIntegration(t *testing.T) { redisDB := 14 testServer := newTestAPIServer(t) - cfg := defaultConfigWithGRPC(port, redisDB, testServer.server.URL, true) + cfg := defaultConfigWithGRPC(t, port, redisDB, testServer.server.URL, true) app, graph := newStartedApp(t, nil, nil, cfg) // Create OTLP protobuf request @@ -1412,7 +1425,7 @@ func TestOTLPGRPCConcurrency(t *testing.T) { redisDB := 15 testServer := newTestAPIServer(t) - cfg := defaultConfigWithGRPC(port, redisDB, testServer.server.URL, true) + cfg := defaultConfigWithGRPC(t, port, redisDB, testServer.server.URL, true) _, graph := newStartedApp(t, nil, nil, cfg) // Connect to gRPC server @@ -1651,7 +1664,7 @@ func createBenchmarkOTLPRequest() *collectortrace.ExportTraceServiceRequest { func BenchmarkTracesOTLP(b *testing.B) { sender := &countingTransmission{} redisDB := 15 - cfg := defaultConfigWithGRPC(18000, redisDB, "", true) + cfg := defaultConfigWithGRPC(b, 18000, redisDB, "", true) _, graph := newStartedApp(b, sender, nil, cfg) defer func() { err := startstop.Stop(graph.Objects(), nil) @@ -1764,7 +1777,7 @@ func BenchmarkTracesOTLP(b *testing.B) { func BenchmarkTraces(b *testing.B) { sender := &countingTransmission{} redisDB := 1 - cfg := defaultConfig(11000, redisDB, "") + cfg := defaultConfig(b, 11000, redisDB, "") _, graph := newStartedApp(b, sender, nil, cfg) defer func() { err := startstop.Stop(graph.Objects(), nil) @@ -1804,8 +1817,8 @@ func BenchmarkTraces(b *testing.B) { } // createRulesBasedConfig creates a mock config with rules-based sampler containing downstream samplers -func createRulesBasedConfig(port, redisDB int, apiURL string, throughputGoal int) *config.MockConfig { - cfg := defaultConfig(port, redisDB, apiURL) +func createRulesBasedConfig(t testing.TB, port, redisDB int, apiURL string, throughputGoal int) *config.MockConfig { + cfg := defaultConfig(t, port, redisDB, apiURL) // Configure rules-based sampler with selective rules cfg.GetSamplerTypeVal = &config.RulesBasedSamplerConfig{ @@ -1890,7 +1903,7 @@ func TestRulesBasedSamplerWithDownstreamAndClusterChanges(t *testing.T) { // Phase 1: Initial setup with single-node cluster mockPeers := peer.NewMockPeers([]string{"http://localhost:20001"}, "http://localhost:20001") - cfg := createRulesBasedConfig(port, redisDB, testServer.server.URL, 100) + cfg := createRulesBasedConfig(t, port, redisDB, testServer.server.URL, 100) _, graph := newStartedApp(t, nil, mockPeers, cfg) defer startstop.Stop(graph.Objects(), nil) @@ -2180,7 +2193,7 @@ func BenchmarkDistributedTraces(b *testing.B) { peers := peer.NewMockPeers(peerList, peerList[i]) redisDB := 2 + i - cfg := defaultConfig(basePort, redisDB, "") + cfg := defaultConfig(b, basePort, redisDB, "") apps[i], graph = newStartedApp(b, sender, peers, cfg) defer startstop.Stop(graph.Objects(), nil) diff --git a/build-docker.sh b/build-docker.sh index e47c388573..f1b9f8137d 100755 --- a/build-docker.sh +++ b/build-docker.sh @@ -3,101 +3,52 @@ set -o nounset set -o pipefail set -o xtrace -### Versioning and image tagging ### -# -# Three build scenarios: -# 1. CI release build: triggered by git tag -# - Stable (vX.Y.Z): tagged with major, minor, patch, and "latest" -# - Pre-release (vX.Y.Z-suffix): tagged only with exact version -# 2. CI branch build: version + CI job ID, tagged with branch name (+ "latest" if main) -# 3. Local build: version from git describe, tagged with that version - -# Get version info from git (used by branch and local builds) -# --tags: use any tag, not just annotated ones -# --match='v[0-9]*': only version tags (starts with v and a digit) -# --always: fall back to commit ID if no tag found -# e.g., v2.1.1-45-ga1b2c3d means commit a1b2c3d, 45 commits ahead of tag v2.1.1 -VERSION_FROM_GIT=$(git describe --tags --match='v[0-9]*' --always) - -if [[ -n "${CIRCLE_TAG:-}" ]]; then - # Release build (triggered by git tag) - VERSION=${CIRCLE_TAG#"v"} - - if [[ "${CIRCLE_TAG}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then - # Stable release: tag with major, minor, patch, and latest - # e.g., v2.1.1 -> "2", "2.1", "2.1.1", "latest" - MAJOR_VERSION=${VERSION%%.*} - MINOR_VERSION=${VERSION%.*} - TAGS="$MAJOR_VERSION,$MINOR_VERSION,$VERSION,latest" - else - # Pre-release: only the exact version tag - # e.g., v3.0.0-rc1 -> "3.0.0-rc1" - TAGS="$VERSION" - fi - -elif [[ -n "${CIRCLE_BRANCH:-}" ]]; then - # CI branch build - # Version from git describe + CI job ID - # e.g., 2.1.1-45-ga1b2c3d-ci8675309 - VERSION="${VERSION_FROM_GIT#'v'}-ci${CIRCLE_BUILD_NUM}" - BRANCH_TAG=${CIRCLE_BRANCH//\//-} - TAGS="${VERSION},branch-${BRANCH_TAG}" - - # Main branch builds are tagged "latest" in the private registry - if [[ "${CIRCLE_BRANCH}" == "main" ]]; then - TAGS+=",latest" - fi - -else - # Local build - # Version from git describe only - # e.g., 2.1.1-45-ga1b2c3d - VERSION=${VERSION_FROM_GIT#'v'} - TAGS="${VERSION}" -fi - -GIT_COMMIT=${CIRCLE_SHA1:-$(git rev-parse HEAD)} +GCLOUD_REGISTRY="gcr.io/sre-team-418623" + +# Parse flags +PUSH=false + +while [[ $# -gt 0 ]]; do + case "$1" in + --push) + PUSH=true + shift + ;; + *) + echo "Usage: $0 [--push]" + echo " --push Build and push to ${GCLOUD_REGISTRY}/refinery" + echo " (default) Build locally only" + exit 1 + ;; + esac +done + +VERSION=$(git describe --tags --match='v[0-9]*' --always) +VERSION=${VERSION#v} +GIT_COMMIT=$(git rev-parse HEAD) unset GOOS unset GOARCH export GOFLAGS="-ldflags=-X=main.BuildID=$VERSION" export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-$(make latest_modification_time)} -# Build the image once, either to a remote registry designated by PRIMARY_DOCKER_REPO -# or to the local repository as "ko.local/refinery:" if PRIMARY_DOCKER_REPO is not set. -export KO_DOCKER_REPO="${PRIMARY_DOCKER_REPO:-ko.local}" +# Force IPv4 to avoid IPv6 connectivity issues when pulling base image layers +export GODEBUG=preferIPv4=1 + +if [[ "$PUSH" == "true" ]]; then + export KO_DOCKER_REPO="$GCLOUD_REGISTRY" +else + export KO_DOCKER_REPO="ko.local" +fi -echo "Building image locally with ko for multi-registry push..." # shellcheck disable=SC2086 -IMAGE_REF=$(./ko publish \ - --tags "${TAGS}" \ +IMAGE_REF=$(ko publish \ + --tags "${VERSION}" \ --base-import-paths \ --platform "linux/amd64,linux/arm64" \ - --image-label org.opencontainers.image.source=https://github.com/honeycombio/refinery \ + --image-label org.opencontainers.image.source=https://github.com/khan/refinery \ --image-label org.opencontainers.image.licenses=Apache-2.0 \ --image-label org.opencontainers.image.revision=${GIT_COMMIT} \ ./cmd/refinery) echo "Built image: ${IMAGE_REF}" - -# If COPY_DOCKER_REPOS is set, copy the built image to each of the listed registries. -# This is a comma-separated list of registry/repo names, e.g. -# "public.ecr.aws/honeycombio,ghcr.io/honeycombio/refinery" -if [[ -n "${COPY_DOCKER_REPOS:-}" ]]; then - echo "Pushing to multiple registries: ${COPY_DOCKER_REPOS}" - - IFS=',' read -ra REPOS <<< "$COPY_DOCKER_REPOS" - for REPO in "${REPOS[@]}"; do - REPO=$(echo "$REPO" | xargs) # trim whitespace - echo "Tagging and pushing to: $REPO" - - # Tag for each tag in the TAGS list - IFS=',' read -ra TAG_LIST <<< "$TAGS" - for TAG in "${TAG_LIST[@]}"; do - TAG=$(echo "$TAG" | xargs) # trim whitespace - TARGET_IMAGE="$REPO/refinery:$TAG" - echo "Copying $IMAGE_REF to $TARGET_IMAGE" - ./crane copy "$IMAGE_REF" "$TARGET_IMAGE" - done - done -fi diff --git a/cmd/refinery/main.go b/cmd/refinery/main.go index 4f68b6a036..efe4e82110 100644 --- a/cmd/refinery/main.go +++ b/cmd/refinery/main.go @@ -99,11 +99,9 @@ func main() { c, err := config.NewConfig(opts, version) if err != nil { - if configErr, isConfigErr := err.(*config.FileConfigError); isConfigErr && configErr.HasErrors() { - fmt.Printf("%+v\n", err) + fmt.Printf("%+v\n", err) + if c == nil { os.Exit(1) - } else { - fmt.Printf("%+v\n", err) } } if opts.Validate { diff --git a/collect/collect.go b/collect/collect.go index ea430892f1..5975274fe0 100644 --- a/collect/collect.go +++ b/collect/collect.go @@ -113,7 +113,8 @@ type InMemCollector struct { hostname string - memMetricSample []rtmetrics.Sample // Memory monitoring using runtime/metrics + memMetricSample []rtmetrics.Sample // Memory monitoring using runtime/metrics + spanCounters []config.SpanCounter } // These are the names of the metrics we use to track the number of events sent to peers through the router. @@ -128,11 +129,14 @@ var inMemCollectorMetrics = []metrics.Metadata{ {Name: "trace_span_count", Type: metrics.Histogram, Unit: metrics.Dimensionless, Description: "number of spans in a trace"}, {Name: "collector_incoming_queue", Type: metrics.Histogram, Unit: metrics.Dimensionless, Description: "number of spans currently in the incoming queue"}, {Name: "collector_peer_queue_length", Type: metrics.Gauge, Unit: metrics.Dimensionless, Description: "number of spans in the peer queue"}, + {Name: "collector_peer_queue_capacity", Type: metrics.Gauge, Unit: metrics.Dimensionless, Description: "configured maximum number of spans in the peer queue"}, {Name: "collector_incoming_queue_length", Type: metrics.Gauge, Unit: metrics.Dimensionless, Description: "number of spans in the incoming queue"}, + {Name: "collector_incoming_queue_capacity", Type: metrics.Gauge, Unit: metrics.Dimensionless, Description: "configured maximum number of spans in the incoming queue"}, {Name: "collector_peer_queue", Type: metrics.Histogram, Unit: metrics.Dimensionless, Description: "number of spans currently in the peer queue"}, {Name: "collector_cache_size", Type: metrics.Gauge, Unit: metrics.Dimensionless, Description: "number of traces currently stored in the trace cache"}, {Name: "collect_cache_entries", Type: metrics.Histogram, Unit: metrics.Dimensionless, Description: "Total number of traces currently stored in the cache from all workers"}, {Name: "memory_heap_allocation", Type: metrics.Gauge, Unit: metrics.Bytes, Description: "current heap allocation"}, + {Name: "memory_limit", Type: metrics.Gauge, Unit: metrics.Bytes, Description: "configured maximum memory allocation for the collector (derived from MaxAlloc or AvailableMemory * MaxMemoryPercentage)"}, {Name: "span_received", Type: metrics.Counter, Unit: metrics.Dimensionless, Description: "number of spans received by the collector"}, {Name: "span_processed", Type: metrics.Counter, Unit: metrics.Dimensionless, Description: "number of spans processed by the collector"}, {Name: "spans_waiting", Type: metrics.UpDown, Unit: metrics.Dimensionless, Description: "number of spans waiting to be processed by the collector"}, @@ -152,6 +156,7 @@ var inMemCollectorMetrics = []metrics.Metadata{ {Name: "dropped_from_stress", Type: metrics.Counter, Unit: metrics.Dimensionless, Description: "number of spans dropped due to stress relief"}, {Name: "kept_from_stress", Type: metrics.Counter, Unit: metrics.Dimensionless, Description: "number of spans kept due to stress relief"}, + {Name: "events_dropped", Type: metrics.Counter, Unit: metrics.Dimensionless, Description: "number of events dropped"}, {Name: "trace_kept_sample_rate", Type: metrics.Histogram, Unit: metrics.Dimensionless, Description: "sample rate of kept traces"}, {Name: "trace_aggregate_sample_rate", Type: metrics.Histogram, Unit: metrics.Dimensionless, Description: "aggregate sample rate of both kept and dropped traces"}, {Name: "collector_collect_loop_duration_ms", Type: metrics.Histogram, Unit: metrics.Milliseconds, Description: "duration of the collect loop, the primary event processing goroutine"}, @@ -159,6 +164,7 @@ var inMemCollectorMetrics = []metrics.Metadata{ {Name: "collector_outgoing_queue", Type: metrics.Histogram, Unit: metrics.Dimensionless, Description: "number of traces waiting to be send to upstream"}, {Name: "collector_cache_eviction", Type: metrics.Counter, Unit: metrics.Dimensionless, Description: "number of times cache eviction has occurred"}, {Name: "collector_num_workers", Type: metrics.Gauge, Unit: metrics.Dimensionless, Description: "number of collector workers"}, + {Name: "span_counter_id_collision", Type: metrics.Counter, Unit: metrics.Dimensionless, Description: "number of times two spans in the same trace share a span ID while computing scoped SpanCounters"}, } func (i *InMemCollector) Start() error { @@ -171,6 +177,7 @@ func (i *InMemCollector) Start() error { i.Logger.Info().WithField("num_workers", numWorkers).Logf("Starting InMemCollector with %d workers", numWorkers) i.StressRelief.UpdateFromConfig() + i.initSpanCounters() // Set queue capacity metrics for stress relief calculations i.Metrics.Store(DENOMINATOR_INCOMING_CAP, float64(imcConfig.IncomingQueueSize)) i.Metrics.Store(DENOMINATOR_PEER_CAP, float64(imcConfig.PeerQueueSize)) @@ -240,6 +247,7 @@ func (i *InMemCollector) reloadConfigs() { i.SamplerFactory.ClearDynsamplers() i.StressRelief.UpdateFromConfig() + i.initSpanCounters() // Send reload signals to all workers to clear their local samplers // so that the new configuration will be propagated @@ -341,6 +349,13 @@ func (i *InMemCollector) monitor() { // Check worker health and report aggregated status i.Health.Ready(collectorHealthKey, i.isReady()) + // Emit queue capacity limits and memory limit so consumers can compute utilization + monitorConfig := i.Config.GetCollectionConfig() + i.Metrics.Gauge("collector_incoming_queue_capacity", float64(monitorConfig.IncomingQueueSize)) + i.Metrics.Gauge("collector_peer_queue_capacity", float64(monitorConfig.PeerQueueSize)) + maxAlloc := monitorConfig.GetMaxAlloc() + i.Metrics.Gauge("memory_limit", float64(maxAlloc)) + // Aggregate metrics totalIncoming := 0 totalPeer := 0 @@ -460,6 +475,7 @@ func (i *InMemCollector) ProcessSpanImmediately(sp *types.Span) (processed bool, if !keep { i.Metrics.Increment("dropped_from_stress") + i.Metrics.Increment("events_dropped") return true, false } @@ -544,6 +560,7 @@ func (i *InMemCollector) dealWithSentTrace(ctx context.Context, tr cache.TraceSe i.Transmission.EnqueueSpan(sp) return } + i.Metrics.Increment("events_dropped") i.Logger.Debug().WithField("trace_id", sp.TraceID).Logf("Dropping span because of previous decision to drop trace") } @@ -600,6 +617,8 @@ func (i *InMemCollector) send(ctx context.Context, trace sendableTrace) { // if we're supposed to drop this trace, and dry run mode is not enabled, then we're done. if !trace.KeepSample && !i.Config.GetIsDryRun() { i.Metrics.Increment("trace_send_dropped") + dropCount := int64(trace.DescendantCount()) + i.Metrics.Count("events_dropped", dropCount) i.Logger.Debug().WithFields(logFields).Logf("Dropping trace because of sampling decision") return } @@ -691,6 +710,293 @@ func (i *InMemCollector) addAdditionalAttributes(sp *types.Span) { } } +// initSpanCounters loads and initializes span counters from the current config. +// Must be called at startup and on config reload. +func (i *InMemCollector) initSpanCounters() { + counters := i.Config.GetSpanCounters() + for j := range counters { + if err := counters[j].Init(); err != nil { + i.Logger.Error().WithField("error", err).Logf("failed to initialize span counter %q", counters[j].Key) + } + } + i.mutex.Lock() + i.spanCounters = counters + i.mutex.Unlock() +} + +// findSuitableRootSpan returns the root span of the trace if one is present. +// If no root span has been identified, it falls back to the non-annotation +// span (i.e. not a span event or link) with the earliest timestamp, which is +// the most likely root. Returns nil if no suitable span exists. +func findSuitableRootSpan(t sendableTrace) *types.Span { + if t.RootSpan != nil { + return t.RootSpan + } + var best *types.Span + for _, sp := range t.GetSpans() { + if sp.AnnotationType() != types.SpanAnnotationTypeSpanEvent && + sp.AnnotationType() != types.SpanAnnotationTypeLink { + if best == nil || sp.Timestamp.Before(best.Timestamp) { + best = sp + } + } + } + return best +} + +// customCountWrite is a single counter-keyed write destined for one span. +type customCountWrite struct { + key string + value int64 +} + +// computeCustomCounts computes each configured SpanCounter and returns the +// per-span attribute writes the caller should apply. +// +// Returns nil if there are no counters configured or no spans to count. +// +// Fast path: when no counter has ScopeConditions, run a single linear scan +// over the spans and write the trace-wide total to the root span β€” identical +// to the original behavior, with no index, DFS, or per-span storage. +// +// Scoped path (engaged when at least one counter has ScopeConditions): build a +// parent->children index, run iterative post-order DFS from each forest root +// (and any unvisited orphan island) to compute per-span subtree counts, then +// emit per-anchor writes plus an optional trace-wide total on the root. +// +// Stress relief note: this runs inside sendTraces(), the sole consumer of the +// tracesToSend channel. Work is O(NΓ—M) β€” N spans Γ— M counters β€” so large +// traces with many counters slow the consumer, which deepens the outgoing +// queue. The stress relief system monitors queue depth as one of its stress +// inputs, so heavy custom-count configurations can raise the measured stress +// level and trigger earlier activation of stress relief. Additionally, spans +// processed via ProcessSpanImmediately (the stress-relief fast path) bypass the +// trace buffer entirely and never reach sendTraces, so custom counts are not +// computed or attached to stress-sampled traces. +func (i *InMemCollector) computeCustomCounts(t sendableTrace) map[*types.Span][]customCountWrite { + i.mutex.RLock() + counters := i.spanCounters + i.mutex.RUnlock() + + if len(counters) == 0 { + return nil + } + + spans := t.GetSpans() + if len(spans) == 0 { + return nil + } + + rootSpan := findSuitableRootSpan(t) + var rootData config.SpanData + if rootSpan != nil { + rootData = &rootSpan.Data + } + + anyScoped := false + for _, c := range counters { + if len(c.ScopeConditions) > 0 { + anyScoped = true + break + } + } + if !anyScoped { + return computeCustomCountsLinear(spans, counters, rootSpan, rootData) + } + + spanIDFields := i.Config.GetSpanIdFieldNames() + parentIDFields := i.Config.GetParentIdFieldNames() + memoFields := make([]string, 0, len(spanIDFields)+len(parentIDFields)) + memoFields = append(memoFields, spanIDFields...) + memoFields = append(memoFields, parentIDFields...) + for _, sp := range spans { + sp.Data.MemoizeFields(memoFields...) + } + + childrenByIndex, forestRoots := buildSpanIndex(spans, spanIDFields, parentIDFields, i.Metrics) + + M := len(counters) + counts := make([]int64, len(spans)*M) + visited := make([]bool, len(spans)) + + for _, ri := range forestRoots { + aggregateSubtree(ri, spans, childrenByIndex, counters, rootData, counts, visited, M) + } + for idx := range visited { + if !visited[idx] { + aggregateSubtree(idx, spans, childrenByIndex, counters, rootData, counts, visited, M) + } + } + + emissions := make(map[*types.Span][]customCountWrite) + for c, counter := range counters { + if len(counter.ScopeConditions) > 0 { + for si, sp := range spans { + if counter.MatchesScope(&sp.Data, rootData) { + emissions[sp] = append(emissions[sp], customCountWrite{counter.Key, counts[si*M+c]}) + } + } + } + if counter.ShouldEmitTotalOnRoot() && rootSpan != nil { + var total int64 + if len(counter.ScopeConditions) > 0 { + for _, ri := range forestRoots { + total += counts[ri*M+c] + } + } else { + for si := range spans { + total += counts[si*M+c] + } + } + emissions[rootSpan] = append(emissions[rootSpan], customCountWrite{counter.EffectiveRootKey(), total}) + } + } + + return emissions +} + +// computeCustomCountsLinear implements the unscoped fast path: a single linear +// pass over spans accumulating one int64 per counter, written to the root. +func computeCustomCountsLinear(spans []*types.Span, counters []config.SpanCounter, rootSpan *types.Span, rootData config.SpanData) map[*types.Span][]customCountWrite { + if rootSpan == nil { + return nil + } + totals := make([]int64, len(counters)) + for _, sp := range spans { + for c, counter := range counters { + if counter.MatchesSpan(&sp.Data, rootData) { + totals[c]++ + } + } + } + writes := make([]customCountWrite, 0, len(counters)) + for c, counter := range counters { + writes = append(writes, customCountWrite{counter.Key, totals[c]}) + } + return map[*types.Span][]customCountWrite{rootSpan: writes} +} + +// spanIDFromPayload reads the first present configured ID field and narrows +// it to a string. Returns ("", false) for missing/empty IDs and for types +// other than string/[]byte β€” such spans become leaf-only in the index. +func spanIDFromPayload(p config.SpanData, fields []string) (string, bool) { + for _, f := range fields { + if !p.Exists(f) { + continue + } + switch v := p.Get(f).(type) { + case string: + if v == "" { + return "", false + } + return v, true + case []byte: + if len(v) == 0 { + return "", false + } + return string(v), true + } + return "", false + } + return "", false +} + +// buildSpanIndex constructs the parent->children index used by the scoped +// aggregation pass. childrenByIndex[i] holds the span indices of span i's +// children, indexed for O(1) DFS lookups (no string keys in the hot path). +// forestRoots holds the indices of spans with no parent or whose parent ID +// is not present in this trace; self-loops are also routed through +// forestRoots so the cycle-defense pass guarantees visitation. Collisions +// on span ID are logged via the span_counter_id_collision metric and +// resolved last-write-wins. +func buildSpanIndex(spans []*types.Span, spanIDFields, parentIDFields []string, m metrics.Metrics) ([][]int, []int) { + idToIndex := make(map[string]int, len(spans)) + for i, sp := range spans { + id, ok := spanIDFromPayload(&sp.Data, spanIDFields) + if !ok { + continue + } + if _, exists := idToIndex[id]; exists { + if m != nil { + m.Increment("span_counter_id_collision") + } + } + idToIndex[id] = i + } + + childrenByIndex := make([][]int, len(spans)) + var forestRoots []int + for i, sp := range spans { + parentID, parentOk := spanIDFromPayload(&sp.Data, parentIDFields) + if !parentOk { + forestRoots = append(forestRoots, i) + continue + } + parentIdx, parentInTrace := idToIndex[parentID] + if !parentInTrace || parentIdx == i { + forestRoots = append(forestRoots, i) + continue + } + childrenByIndex[parentIdx] = append(childrenByIndex[parentIdx], i) + } + return childrenByIndex, forestRoots +} + +// aggregateSubtree runs iterative post-order DFS from rootIndex, populating +// counts[span*M+c] with each counter's subtree count (children's sums plus 1 +// for each counter whose Conditions match the span itself). visited gates +// re-entry so cycles terminate; M is the per-span counter stride. +func aggregateSubtree( + rootIndex int, + spans []*types.Span, + childrenByIndex [][]int, + counters []config.SpanCounter, + rootData config.SpanData, + counts []int64, + visited []bool, + M int, +) { + if visited[rootIndex] { + return + } + type frame struct { + spanIndex int + childCursor int + } + stack := []frame{{spanIndex: rootIndex, childCursor: 0}} + visited[rootIndex] = true + + for len(stack) > 0 { + top := &stack[len(stack)-1] + children := childrenByIndex[top.spanIndex] + if top.childCursor < len(children) { + childIdx := children[top.childCursor] + top.childCursor++ + if visited[childIdx] { + continue + } + visited[childIdx] = true + stack = append(stack, frame{spanIndex: childIdx, childCursor: 0}) + continue + } + + sp := spans[top.spanIndex] + base := top.spanIndex * M + for c, counter := range counters { + if counter.MatchesSpan(&sp.Data, rootData) { + counts[base+c] = 1 + } + } + for _, childIdx := range children { + cbase := childIdx * M + for c := range counters { + counts[base+c] += counts[cbase+c] + } + } + stack = stack[:len(stack)-1] + } +} + func (i *InMemCollector) sendTraces() { defer i.sendTracesWG.Done() @@ -698,6 +1004,8 @@ func (i *InMemCollector) sendTraces() { i.Metrics.Histogram("collector_outgoing_queue", float64(len(i.tracesToSend))) _, span := otelutil.StartSpanMulti(context.Background(), i.Tracer, "sendTrace", map[string]interface{}{"num_spans": t.DescendantCount(), "tracesToSend_size": len(i.tracesToSend)}) + customCounts := i.computeCustomCounts(t) + for _, sp := range t.GetSpans() { if i.Config.GetAddRuleReasonToTrace() { @@ -721,6 +1029,10 @@ func (i *InMemCollector) sendTraces() { } } + for _, w := range customCounts[sp] { + sp.Data.Set(w.key, w.value) + } + isDryRun := i.Config.GetIsDryRun() if isDryRun { sp.Data.Set(config.DryRunFieldName, t.shouldSend) diff --git a/collect/collect_test.go b/collect/collect_test.go index ffa97cab8b..42ff43c636 100644 --- a/collect/collect_test.go +++ b/collect/collect_test.go @@ -477,8 +477,9 @@ func TestDryRunMode(t *testing.T) { transmission := coll.Transmission.(*transmit.MockTransmission) samplerFactory := &sample.SamplerFactory{ - Config: conf, - Logger: &logger.NullLogger{}, + Config: conf, + Logger: &logger.NullLogger{}, + Metrics: &metrics.NullMetrics{}, } sampler := samplerFactory.GetSamplerImplementationForKey("test") coll.SamplerFactory = samplerFactory @@ -1901,6 +1902,914 @@ func TestWorkerHealthReporting(t *testing.T) { }, 2*time.Second, 50*time.Millisecond, "InMemCollector should be healthy again after worker resumes") } +// customCountConf returns a base MockConfig suitable for custom span count tests. +func customCountConf(counters []config.SpanCounter) *config.MockConfig { + return &config.MockConfig{ + GetTracesConfigVal: config.TracesConfig{ + SendTicker: config.Duration(2 * time.Millisecond), + SendDelay: config.Duration(1 * time.Millisecond), + TraceTimeout: config.Duration(60 * time.Second), + MaxBatchSize: 500, + }, + SampleCache: config.SampleCacheConfig{ + KeptSize: 100, + DroppedSize: 100, + SizeCheckInterval: config.Duration(1 * time.Second), + }, + GetSamplerTypeVal: &config.DeterministicSamplerConfig{SampleRate: 1}, + TraceIdFieldNames: []string{"trace.trace_id", "traceId"}, + ParentIdFieldNames: []string{"trace.parent_id", "parentId"}, + SpanIdFieldNames: []string{"trace.span_id", "spanId"}, + GetCollectionConfigVal: config.CollectionConfig{ + WorkerCount: 2, + ShutdownDelay: config.Duration(1 * time.Millisecond), + IncomingQueueSize: 1000, + PeerQueueSize: 1000, + }, + SpanCounters: counters, + } +} + +// TestCustomSpanCounts_NoCounters verifies that when no counters are configured +// no custom fields are added to any span. +func TestCustomSpanCounts_NoCounters(t *testing.T) { + coll := newTestCollector(t, customCountConf(nil)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "no-counters" + coll.AddSpanFromPeer(&types.Span{ + TraceID: traceID, + Event: &types.Event{ + Dataset: "test", + Data: types.NewPayload(coll.Config, map[string]interface{}{"trace.parent_id": "x"}), + APIKey: legacyAPIKey, + }, + }) + coll.AddSpan(&types.Span{ + TraceID: traceID, + IsRoot: true, + Event: &types.Event{Dataset: "test", Data: types.NewPayload(coll.Config, nil), APIKey: legacyAPIKey}, + }) + + events := transmission.GetBlock(2) + for _, ev := range events { + assert.Nil(t, ev.Data.Get("my.count"), "no custom count fields should be set when no counters are configured") + } +} + +// TestCustomSpanCounts_CountsLandOnRoot verifies that a counter with no +// conditions counts all spans and attaches the result to the root span only. +func TestCustomSpanCounts_CountsLandOnRoot(t *testing.T) { + counters := []config.SpanCounter{ + {Key: "all_spans"}, + } + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "root-target" + for i := 0; i < 3; i++ { + coll.AddSpanFromPeer(&types.Span{ + TraceID: traceID, + Event: &types.Event{ + Dataset: "test", + Data: types.NewPayload(coll.Config, map[string]interface{}{"trace.parent_id": "x"}), + APIKey: legacyAPIKey, + }, + }) + } + coll.AddSpan(&types.Span{ + TraceID: traceID, + IsRoot: true, + Event: &types.Event{Dataset: "test", Data: types.NewPayload(coll.Config, nil), APIKey: legacyAPIKey}, + }) + + events := transmission.GetBlock(4) + require.Equal(t, 4, len(events)) + + var rootEvent *types.Event + var childEvents []*types.Event + for _, ev := range events { + if ev.Data.Get("trace.parent_id") == nil { + rootEvent = ev + } else { + childEvents = append(childEvents, ev) + } + } + + require.NotNil(t, rootEvent) + // all 4 spans counted (3 children + root) + assert.Equal(t, int64(4), rootEvent.Data.Get("all_spans")) + for _, child := range childEvents { + assert.Nil(t, child.Data.Get("all_spans"), "custom count should not be set on child spans") + } +} + +// TestCustomSpanCounts_ConditionalCounting verifies that only spans matching +// a condition are counted. +func TestCustomSpanCounts_ConditionalCounting(t *testing.T) { + counters := []config.SpanCounter{ + { + Key: "error_spans", + Conditions: []*config.RulesBasedSamplerCondition{ + {Field: "error", Operator: config.EQ, Value: true}, + }, + }, + } + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "conditional" + // 2 error spans + for i := 0; i < 2; i++ { + coll.AddSpanFromPeer(&types.Span{ + TraceID: traceID, + Event: &types.Event{ + Dataset: "test", + Data: types.NewPayload(coll.Config, map[string]interface{}{"trace.parent_id": "x", "error": true}), + APIKey: legacyAPIKey, + }, + }) + } + // 2 non-error spans + for i := 0; i < 2; i++ { + coll.AddSpanFromPeer(&types.Span{ + TraceID: traceID, + Event: &types.Event{ + Dataset: "test", + Data: types.NewPayload(coll.Config, map[string]interface{}{"trace.parent_id": "x"}), + APIKey: legacyAPIKey, + }, + }) + } + coll.AddSpan(&types.Span{ + TraceID: traceID, + IsRoot: true, + Event: &types.Event{Dataset: "test", Data: types.NewPayload(coll.Config, nil), APIKey: legacyAPIKey}, + }) + + events := transmission.GetBlock(5) + require.Equal(t, 5, len(events)) + + var rootEvent *types.Event + for _, ev := range events { + if ev.Data.Get("trace.parent_id") == nil { + rootEvent = ev + } + } + require.NotNil(t, rootEvent) + assert.Equal(t, int64(2), rootEvent.Data.Get("error_spans")) +} + +// TestCustomSpanCounts_MultipleCounters verifies that multiple counters with +// different conditions produce independent counts on the root span. +func TestCustomSpanCounts_MultipleCounters(t *testing.T) { + counters := []config.SpanCounter{ + { + Key: "db_spans", + Conditions: []*config.RulesBasedSamplerCondition{ + {Field: "db.system", Operator: config.Exists}, + }, + }, + { + Key: "error_spans", + Conditions: []*config.RulesBasedSamplerCondition{ + {Field: "error", Operator: config.EQ, Value: true}, + }, + }, + } + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "multi-counter" + spans := []map[string]interface{}{ + {"trace.parent_id": "x", "db.system": "postgresql"}, + {"trace.parent_id": "x", "db.system": "postgresql", "error": true}, + {"trace.parent_id": "x", "error": true}, + {"trace.parent_id": "x"}, + } + for _, data := range spans { + coll.AddSpanFromPeer(&types.Span{ + TraceID: traceID, + Event: &types.Event{ + Dataset: "test", + Data: types.NewPayload(coll.Config, data), + APIKey: legacyAPIKey, + }, + }) + } + coll.AddSpan(&types.Span{ + TraceID: traceID, + IsRoot: true, + Event: &types.Event{Dataset: "test", Data: types.NewPayload(coll.Config, nil), APIKey: legacyAPIKey}, + }) + + events := transmission.GetBlock(5) + require.Equal(t, 5, len(events)) + + var rootEvent *types.Event + for _, ev := range events { + if ev.Data.Get("trace.parent_id") == nil { + rootEvent = ev + } + } + require.NotNil(t, rootEvent) + assert.Equal(t, int64(2), rootEvent.Data.Get("db_spans"), "2 spans have db.system") + assert.Equal(t, int64(2), rootEvent.Data.Get("error_spans"), "2 spans have error=true") +} + +// TestCustomSpanCounts_NoRootSpan verifies that when a trace times out without +// a root span, custom counts land on the first non-annotation span instead. +func TestCustomSpanCounts_NoRootSpan(t *testing.T) { + conf := customCountConf([]config.SpanCounter{{Key: "all_spans"}}) + conf.GetTracesConfigVal.TraceTimeout = config.Duration(5 * time.Millisecond) + + coll := newTestCollector(t, conf) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "no-root" + // annotation span: should not be the target + coll.AddSpanFromPeer(&types.Span{ + TraceID: traceID, + Event: &types.Event{ + Dataset: "test", + Data: func() types.Payload { + p := types.NewPayload(coll.Config, map[string]interface{}{"trace.parent_id": "x"}) + p.MetaAnnotationType = "span_event" + return p + }(), + APIKey: legacyAPIKey, + }, + }) + // regular span: should be the target + coll.AddSpanFromPeer(&types.Span{ + TraceID: traceID, + Event: &types.Event{ + Dataset: "test", + Data: types.NewPayload(coll.Config, map[string]interface{}{"trace.parent_id": "x"}), + APIKey: legacyAPIKey, + }, + }) + + events := transmission.GetBlock(2) + require.Equal(t, 2, len(events)) + + // Exactly one span should carry the custom count (the first real span). + var counted []*types.Event + for _, ev := range events { + if ev.Data.Get("all_spans") != nil { + counted = append(counted, ev) + } + } + require.Equal(t, 1, len(counted), "custom count should appear on exactly one span when there is no root") + assert.Equal(t, int64(2), counted[0].Data.Get("all_spans"), "both spans should be counted") +} + +// addPeerSpan is a tiny helper for the scoped SpanCounter tests: it constructs +// a non-root span with the given data and pushes it via AddSpanFromPeer. +func addPeerSpan(t *testing.T, coll *InMemCollector, traceID string, data map[string]any) { + t.Helper() + coll.AddSpanFromPeer(&types.Span{ + TraceID: traceID, + Event: &types.Event{ + Dataset: "test", + Data: types.NewPayload(coll.Config, data), + APIKey: legacyAPIKey, + }, + }) +} + +// addRootSpan is a tiny helper for the scoped SpanCounter tests: it constructs +// a root span with the given data and pushes it via AddSpan. +func addRootSpan(t *testing.T, coll *InMemCollector, traceID string, data map[string]any) { + t.Helper() + coll.AddSpan(&types.Span{ + TraceID: traceID, + IsRoot: true, + Event: &types.Event{ + Dataset: "test", + Data: types.NewPayload(coll.Config, data), + APIKey: legacyAPIKey, + }, + }) +} + +// findEventBySpanID returns the first event whose span ID matches. +func findEventBySpanID(events []*types.Event, id string) *types.Event { + for _, ev := range events { + if ev.Data.Get("trace.span_id") == id { + return ev + } + } + return nil +} + +// TestCustomSpanCounts_Scoped_MultipleAnchors verifies that a single +// ScopeConditions-equipped counter writes per-anchor subtree counts. +// +// Trace shape (5 resolver anchors, each with 2 db.query descendants): +// +// root (s0) +// β”œβ”€β”€ r1 ── db1a, db1b +// β”œβ”€β”€ r2 ── db2a, db2b +// β”œβ”€β”€ r3 ── db3a, db3b +// β”œβ”€β”€ r4 ── db4a, db4b +// └── r5 ── db5a, db5b +func TestCustomSpanCounts_Scoped_MultipleAnchors(t *testing.T) { + counters := []config.SpanCounter{{ + Key: "db_call_count", + Conditions: []*config.RulesBasedSamplerCondition{ + {Field: "name", Operator: config.EQ, Value: "db.query"}, + }, + ScopeConditions: []*config.RulesBasedSamplerCondition{ + {Field: "graphql.operation.name", Operator: config.Exists}, + }, + }} + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "scoped-many-anchors" + + for r := 1; r <= 5; r++ { + resolverID := fmt.Sprintf("r%d", r) + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": resolverID, + "trace.parent_id": "s0", + "graphql.operation.name": fmt.Sprintf("Query%d", r), + }) + for d := 0; d < 2; d++ { + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": fmt.Sprintf("db%d%d", r, d), + "trace.parent_id": resolverID, + "name": "db.query", + }) + } + } + addRootSpan(t, coll, traceID, map[string]any{"trace.span_id": "s0"}) + + events := transmission.GetBlock(16) + require.Equal(t, 16, len(events)) + + for r := 1; r <= 5; r++ { + ev := findEventBySpanID(events, fmt.Sprintf("r%d", r)) + require.NotNil(t, ev, "resolver r%d missing", r) + assert.Equal(t, int64(2), ev.Data.Get("db_call_count"), "resolver r%d", r) + } + + root := findEventBySpanID(events, "s0") + require.NotNil(t, root) + assert.Nil(t, root.Data.Get("db_call_count"), "no RootKey set β†’ no root write") + for r := 1; r <= 5; r++ { + for d := 0; d < 2; d++ { + ev := findEventBySpanID(events, fmt.Sprintf("db%d%d", r, d)) + require.NotNil(t, ev) + assert.Nil(t, ev.Data.Get("db_call_count"), "leaf spans should not be written to") + } + } +} + +// TestCustomSpanCounts_Scoped_RootTotalViaRootKey verifies that setting +// RootKey alongside ScopeConditions causes the root to receive the +// trace-wide total under RootKey, while anchors still get Key. +func TestCustomSpanCounts_Scoped_RootTotalViaRootKey(t *testing.T) { + counters := []config.SpanCounter{{ + Key: "db_call_count", + RootKey: "db_call_total", + Conditions: []*config.RulesBasedSamplerCondition{ + {Field: "name", Operator: config.EQ, Value: "db.query"}, + }, + ScopeConditions: []*config.RulesBasedSamplerCondition{ + {Field: "graphql.operation.name", Operator: config.Exists}, + }, + }} + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "scoped-with-total" + for r := 1; r <= 3; r++ { + resolverID := fmt.Sprintf("r%d", r) + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": resolverID, + "trace.parent_id": "s0", + "graphql.operation.name": fmt.Sprintf("Query%d", r), + }) + for d := 0; d < 4; d++ { + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": fmt.Sprintf("db%d%d", r, d), + "trace.parent_id": resolverID, + "name": "db.query", + }) + } + } + addRootSpan(t, coll, traceID, map[string]any{"trace.span_id": "s0"}) + + events := transmission.GetBlock(16) + require.Equal(t, 16, len(events)) + + for r := 1; r <= 3; r++ { + ev := findEventBySpanID(events, fmt.Sprintf("r%d", r)) + require.NotNil(t, ev) + assert.Equal(t, int64(4), ev.Data.Get("db_call_count")) + assert.Nil(t, ev.Data.Get("db_call_total")) + } + root := findEventBySpanID(events, "s0") + require.NotNil(t, root) + assert.Equal(t, int64(12), root.Data.Get("db_call_total"), "root should get trace-wide total under RootKey") + assert.Nil(t, root.Data.Get("db_call_count"), "root does not receive Key when RootKey is set") +} + +// TestCustomSpanCounts_Scoped_RootKey verifies that when both +// ScopeConditions and RootKey are set, the per-anchor writes use Key and +// the root's trace-wide total uses RootKey β€” landing on a separate field +// so the two counts can be queried independently. +func TestCustomSpanCounts_Scoped_RootKey(t *testing.T) { + counters := []config.SpanCounter{{ + Key: "resolver_db_count", + RootKey: "trace_db_count", + Conditions: []*config.RulesBasedSamplerCondition{ + {Field: "name", Operator: config.EQ, Value: "db.query"}, + }, + ScopeConditions: []*config.RulesBasedSamplerCondition{ + {Field: "graphql.operation.name", Operator: config.Exists}, + }, + }} + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "rootkey" + for r := 1; r <= 2; r++ { + resolverID := fmt.Sprintf("r%d", r) + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": resolverID, + "trace.parent_id": "s0", + "graphql.operation.name": fmt.Sprintf("Query%d", r), + }) + for d := 0; d < 3; d++ { + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": fmt.Sprintf("db%d%d", r, d), + "trace.parent_id": resolverID, + "name": "db.query", + }) + } + } + addRootSpan(t, coll, traceID, map[string]any{"trace.span_id": "s0"}) + + events := transmission.GetBlock(9) + require.Equal(t, 9, len(events)) + + for r := 1; r <= 2; r++ { + ev := findEventBySpanID(events, fmt.Sprintf("r%d", r)) + require.NotNil(t, ev) + assert.Equal(t, int64(3), ev.Data.Get("resolver_db_count"), "anchor gets Key") + assert.Nil(t, ev.Data.Get("trace_db_count"), "anchor does not get RootKey") + } + root := findEventBySpanID(events, "s0") + require.NotNil(t, root) + assert.Equal(t, int64(6), root.Data.Get("trace_db_count"), "root gets RootKey for total") + assert.Nil(t, root.Data.Get("resolver_db_count"), "root does not get Key when RootKey overrides") +} + +// TestCustomSpanCounts_Unscoped_RootKeyIgnored verifies that RootKey on an +// unscoped counter (no ScopeConditions) is ignored β€” the root still gets +// Key, preserving today's behavior. +func TestCustomSpanCounts_Unscoped_RootKeyIgnored(t *testing.T) { + counters := []config.SpanCounter{{ + Key: "k", + RootKey: "rk", + Conditions: []*config.RulesBasedSamplerCondition{ + {Field: "name", Operator: config.EQ, Value: "db.query"}, + }, + }} + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "rootkey-ignored" + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "c1", + "trace.parent_id": "s0", + "name": "db.query", + }) + addRootSpan(t, coll, traceID, map[string]any{"trace.span_id": "s0"}) + + events := transmission.GetBlock(2) + require.Equal(t, 2, len(events)) + + root := findEventBySpanID(events, "s0") + require.NotNil(t, root) + assert.Equal(t, int64(1), root.Data.Get("k"), "unscoped counter writes Key to root") + assert.Nil(t, root.Data.Get("rk"), "RootKey is ignored when ScopeConditions is empty") +} + +// TestCustomSpanCounts_Scoped_NestedAnchors verifies that an outer anchor's +// count includes the inner anchor's subtree (no special-casing of nested +// anchors). +// +// root (s0) +// └── outer (anchor) ── db_outer +// └── inner (anchor) ── db_inner1, db_inner2 +func TestCustomSpanCounts_Scoped_NestedAnchors(t *testing.T) { + counters := []config.SpanCounter{{ + Key: "db_calls", + Conditions: []*config.RulesBasedSamplerCondition{ + {Field: "name", Operator: config.EQ, Value: "db.query"}, + }, + ScopeConditions: []*config.RulesBasedSamplerCondition{ + {Field: "anchor", Operator: config.EQ, Value: true}, + }, + }} + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "nested" + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "outer", + "trace.parent_id": "s0", + "anchor": true, + }) + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "db_outer", + "trace.parent_id": "outer", + "name": "db.query", + }) + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "inner", + "trace.parent_id": "outer", + "anchor": true, + }) + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "db_inner1", + "trace.parent_id": "inner", + "name": "db.query", + }) + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "db_inner2", + "trace.parent_id": "inner", + "name": "db.query", + }) + addRootSpan(t, coll, traceID, map[string]any{"trace.span_id": "s0"}) + + events := transmission.GetBlock(6) + require.Equal(t, 6, len(events)) + + outer := findEventBySpanID(events, "outer") + inner := findEventBySpanID(events, "inner") + require.NotNil(t, outer) + require.NotNil(t, inner) + assert.Equal(t, int64(3), outer.Data.Get("db_calls"), "outer subtree: db_outer + db_inner1 + db_inner2") + assert.Equal(t, int64(2), inner.Data.Get("db_calls"), "inner subtree: db_inner1 + db_inner2") +} + +// TestCustomSpanCounts_Scoped_AnchorMatchesRoot verifies the root span can +// itself be an anchor, in which case the count appears on it once. +func TestCustomSpanCounts_Scoped_AnchorMatchesRoot(t *testing.T) { + counters := []config.SpanCounter{{ + Key: "all", + ScopeConditions: []*config.RulesBasedSamplerCondition{ + {Field: "kind", Operator: config.EQ, Value: "server"}, + }, + }} + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "anchor-is-root" + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "c1", + "trace.parent_id": "s0", + }) + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "c2", + "trace.parent_id": "s0", + }) + addRootSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "s0", + "kind": "server", + }) + + events := transmission.GetBlock(3) + require.Equal(t, 3, len(events)) + + root := findEventBySpanID(events, "s0") + require.NotNil(t, root) + assert.Equal(t, int64(3), root.Data.Get("all"), "anchor-as-root counts whole subtree") + for _, id := range []string{"c1", "c2"} { + ev := findEventBySpanID(events, id) + require.NotNil(t, ev) + assert.Nil(t, ev.Data.Get("all")) + } +} + +// TestCustomSpanCounts_Scoped_AnchorMatchesNothing verifies that when no span +// matches ScopeConditions, no anchor writes occur. RootKey is set so the root +// still receives the trace-wide total. +func TestCustomSpanCounts_Scoped_AnchorMatchesNothing(t *testing.T) { + counters := []config.SpanCounter{{ + Key: "errs", + RootKey: "err_total", + Conditions: []*config.RulesBasedSamplerCondition{ + {Field: "error", Operator: config.EQ, Value: true}, + }, + ScopeConditions: []*config.RulesBasedSamplerCondition{ + {Field: "no-such-anchor", Operator: config.Exists}, + }, + }} + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "anchor-zero" + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "c1", + "trace.parent_id": "s0", + "error": true, + }) + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "c2", + "trace.parent_id": "s0", + "error": true, + }) + addRootSpan(t, coll, traceID, map[string]any{"trace.span_id": "s0"}) + + events := transmission.GetBlock(3) + require.Equal(t, 3, len(events)) + + root := findEventBySpanID(events, "s0") + require.NotNil(t, root) + assert.Equal(t, int64(2), root.Data.Get("err_total"), "RootKey set β†’ root has trace-wide total") + assert.Nil(t, root.Data.Get("errs"), "anchor Key not written to root") +} + +// TestCustomSpanCounts_Scoped_AnchorMatchesEverySpan verifies a permissive +// scope (everything is an anchor) β€” every span receives its own subtree count. +func TestCustomSpanCounts_Scoped_AnchorMatchesEverySpan(t *testing.T) { + counters := []config.SpanCounter{{ + Key: "subtree", + ScopeConditions: []*config.RulesBasedSamplerCondition{ + {Field: "trace.span_id", Operator: config.Exists}, + }, + }} + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "all-anchors" + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "c1", + "trace.parent_id": "s0", + }) + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "c2", + "trace.parent_id": "c1", + }) + addRootSpan(t, coll, traceID, map[string]any{"trace.span_id": "s0"}) + + events := transmission.GetBlock(3) + require.Equal(t, 3, len(events)) + + assert.Equal(t, int64(3), findEventBySpanID(events, "s0").Data.Get("subtree")) + assert.Equal(t, int64(2), findEventBySpanID(events, "c1").Data.Get("subtree")) + assert.Equal(t, int64(1), findEventBySpanID(events, "c2").Data.Get("subtree")) +} + +// TestCustomSpanCounts_Scoped_MultiForestEmitTotal verifies that a trace with +// two forest roots (a missing intermediate span β€” e.g., a load balancer not +// in Refinery's view) produces a correct trace-wide total when RootKey is +// set. The total sums each forest's subtree counts. +func TestCustomSpanCounts_Scoped_MultiForestEmitTotal(t *testing.T) { + counters := []config.SpanCounter{{ + Key: "db_call_count", + RootKey: "db_call_total", + Conditions: []*config.RulesBasedSamplerCondition{ + {Field: "name", Operator: config.EQ, Value: "db.query"}, + }, + ScopeConditions: []*config.RulesBasedSamplerCondition{ + {Field: "graphql.operation.name", Operator: config.Exists}, + }, + }} + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "multi-forest" + // Forest A: parent points to a missing "missing-lb" span. + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "a1", + "trace.parent_id": "missing-lb", + "graphql.operation.name": "QueryA", + }) + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "a_db1", + "trace.parent_id": "a1", + "name": "db.query", + }) + // Forest B (with the root span the chooser will pick). + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "b1", + "trace.parent_id": "s0", + "graphql.operation.name": "QueryB", + }) + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "b_db1", + "trace.parent_id": "b1", + "name": "db.query", + }) + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "b_db2", + "trace.parent_id": "b1", + "name": "db.query", + }) + addRootSpan(t, coll, traceID, map[string]any{"trace.span_id": "s0"}) + + events := transmission.GetBlock(6) + require.Equal(t, 6, len(events)) + + assert.Equal(t, int64(1), findEventBySpanID(events, "a1").Data.Get("db_call_count")) + assert.Equal(t, int64(2), findEventBySpanID(events, "b1").Data.Get("db_call_count")) + assert.Equal(t, int64(3), findEventBySpanID(events, "s0").Data.Get("db_call_total"), + "trace-wide total must sum across both forest roots") +} + +// TestCustomSpanCounts_Scoped_MultiForestNoTotal verifies that with no +// RootKey set, anchors in disjoint forests still get correct per-anchor +// counts and no root write happens. +func TestCustomSpanCounts_Scoped_MultiForestNoTotal(t *testing.T) { + counters := []config.SpanCounter{{ + Key: "db_call_count", + Conditions: []*config.RulesBasedSamplerCondition{ + {Field: "name", Operator: config.EQ, Value: "db.query"}, + }, + ScopeConditions: []*config.RulesBasedSamplerCondition{ + {Field: "graphql.operation.name", Operator: config.Exists}, + }, + }} + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "multi-forest-no-total" + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "a1", + "trace.parent_id": "missing-lb", + "graphql.operation.name": "QueryA", + }) + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "a_db", + "trace.parent_id": "a1", + "name": "db.query", + }) + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "b1", + "trace.parent_id": "s0", + "graphql.operation.name": "QueryB", + }) + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "b_db", + "trace.parent_id": "b1", + "name": "db.query", + }) + addRootSpan(t, coll, traceID, map[string]any{"trace.span_id": "s0"}) + + events := transmission.GetBlock(5) + require.Equal(t, 5, len(events)) + + assert.Equal(t, int64(1), findEventBySpanID(events, "a1").Data.Get("db_call_count")) + assert.Equal(t, int64(1), findEventBySpanID(events, "b1").Data.Get("db_call_count")) + assert.Nil(t, findEventBySpanID(events, "s0").Data.Get("db_call_count"), + "no RootKey set β†’ no root write") +} + +// TestCustomSpanCounts_Scoped_TwoCycleDefense verifies that a parent-ID cycle +// (X.parent=Y, Y.parent=X), neither a forest root, does not cause an infinite +// loop and that both spans get a count via the unvisited-island pass. +func TestCustomSpanCounts_Scoped_TwoCycleDefense(t *testing.T) { + counters := []config.SpanCounter{{ + Key: "self", + ScopeConditions: []*config.RulesBasedSamplerCondition{ + {Field: "trace.span_id", Operator: config.Exists}, + }, + }} + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "two-cycle" + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "x", + "trace.parent_id": "y", + }) + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "y", + "trace.parent_id": "x", + }) + addRootSpan(t, coll, traceID, map[string]any{"trace.span_id": "s0"}) + + events := transmission.GetBlock(3) + require.Equal(t, 3, len(events)) + + // Both cycle members are visited and each has a count, even though they + // are not reachable from the forest root. + x := findEventBySpanID(events, "x") + y := findEventBySpanID(events, "y") + require.NotNil(t, x) + require.NotNil(t, y) + // At least one of x/y must have a count > 0 β€” the unvisited-island pass + // picks a starting node and treats the cycle as its own tree. + xCount, _ := x.Data.Get("self").(int64) + yCount, _ := y.Data.Get("self").(int64) + assert.GreaterOrEqual(t, xCount, int64(1)) + assert.GreaterOrEqual(t, yCount, int64(1)) +} + +// TestCustomSpanCounts_Scoped_SelfLoopDefense verifies that a span whose +// parent ID equals its own span ID is treated as a forest root and counted +// once (its own contribution). +func TestCustomSpanCounts_Scoped_SelfLoopDefense(t *testing.T) { + counters := []config.SpanCounter{{ + Key: "self", + ScopeConditions: []*config.RulesBasedSamplerCondition{ + {Field: "trace.span_id", Operator: config.Exists}, + }, + }} + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "self-loop" + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "loopy", + "trace.parent_id": "loopy", + }) + addRootSpan(t, coll, traceID, map[string]any{"trace.span_id": "s0"}) + + events := transmission.GetBlock(2) + require.Equal(t, 2, len(events)) + assert.Equal(t, int64(1), findEventBySpanID(events, "loopy").Data.Get("self")) +} + +// TestCustomSpanCounts_Scoped_SpanIDCollision verifies that two spans with +// the same span ID don't panic and the span_counter_id_collision metric is +// incremented. +func TestCustomSpanCounts_Scoped_SpanIDCollision(t *testing.T) { + counters := []config.SpanCounter{{ + Key: "subtree", + ScopeConditions: []*config.RulesBasedSamplerCondition{ + {Field: "trace.span_id", Operator: config.Exists}, + }, + }} + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + m := coll.Metrics.(*metrics.MockMetrics) + + traceID := "id-collision" + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "dup", + "trace.parent_id": "s0", + }) + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "dup", // same ID as the previous one + "trace.parent_id": "s0", + }) + addRootSpan(t, coll, traceID, map[string]any{"trace.span_id": "s0"}) + + events := transmission.GetBlock(3) + require.Equal(t, 3, len(events)) + assert.GreaterOrEqual(t, m.CounterIncrements["span_counter_id_collision"], int64(1), + "collision metric should be incremented at least once") +} + +// TestCustomSpanCounts_BackwardsCompat_FastPath verifies that with no +// ScopeConditions configured the fast path emits the same trace-wide total +// on the root span as before β€” bit-for-bit identical to the original +// behavior. +func TestCustomSpanCounts_BackwardsCompat_FastPath(t *testing.T) { + counters := []config.SpanCounter{{ + Key: "db", + Conditions: []*config.RulesBasedSamplerCondition{ + {Field: "name", Operator: config.EQ, Value: "db.query"}, + }, + }} + coll := newTestCollector(t, customCountConf(counters)) + transmission := coll.Transmission.(*transmit.MockTransmission) + + traceID := "backwards-compat" + for i := 0; i < 3; i++ { + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": fmt.Sprintf("c%d", i), + "trace.parent_id": "s0", + "name": "db.query", + }) + } + addPeerSpan(t, coll, traceID, map[string]any{ + "trace.span_id": "c3", + "trace.parent_id": "s0", + }) + addRootSpan(t, coll, traceID, map[string]any{"trace.span_id": "s0"}) + + events := transmission.GetBlock(5) + require.Equal(t, 5, len(events)) + + root := findEventBySpanID(events, "s0") + require.NotNil(t, root) + assert.Equal(t, int64(3), root.Data.Get("db")) + for _, id := range []string{"c0", "c1", "c2", "c3"} { + ev := findEventBySpanID(events, id) + require.NotNil(t, ev) + assert.Nil(t, ev.Data.Get("db"), "child span should not carry the counter on the fast path") + } +} + // BenchmarkCollectorWithSamplers runs benchmarks for different sampler configurations. // This is a tricky benchmark to interpret because just setting up the input data // can easily be more expensive than the collector's routing code. The goal is to @@ -2218,3 +3127,148 @@ func (c *mockSender) waitForCount(target int) { } } } + +// makeBenchmarkTrace builds a synthetic 2,868-span trace shaped like a typical +// resolver-heavy graphql workload: 1 root, `anchorCount` resolver spans that +// satisfy the scoped tests' ScopeConditions, and the remainder distributed as +// "db.query" children under the resolvers (plus filler). +// +// The Refinery payload is constructed via types.NewPayload so MemoizeFields +// behaves the same as in production. +func makeBenchmarkTrace(cfg config.Config, totalSpans, anchorCount int) *types.Trace { + traceID := "bench" + trace := &types.Trace{ + TraceID: traceID, + Dataset: "bench", + APIKey: legacyAPIKey, + ArrivalTime: time.Now(), + } + + root := &types.Span{ + TraceID: traceID, + IsRoot: true, + Event: &types.Event{ + Dataset: "bench", + Data: types.NewPayload(cfg, map[string]any{ + "trace.span_id": "s0", + }), + APIKey: legacyAPIKey, + }, + } + trace.AddSpan(root) + trace.RootSpan = root + + anchors := make([]string, 0, anchorCount) + for a := 0; a < anchorCount; a++ { + anchorID := fmt.Sprintf("a%d", a) + anchors = append(anchors, anchorID) + trace.AddSpan(&types.Span{ + TraceID: traceID, + Event: &types.Event{ + Dataset: "bench", + Data: types.NewPayload(cfg, map[string]any{ + "trace.span_id": anchorID, + "trace.parent_id": "s0", + "graphql.operation.name": fmt.Sprintf("Query%d", a), + }), + APIKey: legacyAPIKey, + }, + }) + } + + added := 1 + anchorCount + i := 0 + for added < totalSpans { + anchorID := anchors[i%len(anchors)] + trace.AddSpan(&types.Span{ + TraceID: traceID, + Event: &types.Event{ + Dataset: "bench", + Data: types.NewPayload(cfg, map[string]any{ + "trace.span_id": fmt.Sprintf("d%d", added), + "trace.parent_id": anchorID, + "name": "db.query", + }), + APIKey: legacyAPIKey, + }, + }) + added++ + i++ + } + return trace +} + +// makeBenchmarkCollector constructs a minimal InMemCollector bypassing the +// usual Start() machinery β€” we only need the fields touched by +// computeCustomCounts. Counters are initialized in place. +func makeBenchmarkCollector(b *testing.B, counters []config.SpanCounter) *InMemCollector { + for j := range counters { + require.NoError(b, counters[j].Init()) + } + conf := customCountConf(counters) + m := &metrics.MockMetrics{} + m.Start() + c := &InMemCollector{ + Config: conf, + Metrics: m, + spanCounters: counters, + } + return c +} + +func benchmarkComputeCustomCounts(b *testing.B, counters []config.SpanCounter) { + c := makeBenchmarkCollector(b, counters) + trace := makeBenchmarkTrace(c.Config, 2868, 5) + st := sendableTrace{Trace: trace} + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = c.computeCustomCounts(st) + } +} + +// BenchmarkComputeCustomCounts_NoScope exercises the fast path: a single +// unscoped counter on a 2,868-span trace. Should track today's +// implementation's cost (no DFS, no index). +func BenchmarkComputeCustomCounts_NoScope(b *testing.B) { + counters := []config.SpanCounter{{ + Key: "db", + Conditions: []*config.RulesBasedSamplerCondition{ + {Field: "name", Operator: config.EQ, Value: "db.query"}, + }, + }} + benchmarkComputeCustomCounts(b, counters) +} + +// BenchmarkComputeCustomCounts_Scoped exercises the scoped path: one scoped +// counter with 5 anchors on a 2,868-span trace. +func BenchmarkComputeCustomCounts_Scoped(b *testing.B) { + counters := []config.SpanCounter{{ + Key: "db", + Conditions: []*config.RulesBasedSamplerCondition{ + {Field: "name", Operator: config.EQ, Value: "db.query"}, + }, + ScopeConditions: []*config.RulesBasedSamplerCondition{ + {Field: "graphql.operation.name", Operator: config.Exists}, + }, + }} + benchmarkComputeCustomCounts(b, counters) +} + +// BenchmarkComputeCustomCounts_ScopedMulti exercises the per-counter loop in +// the write pass: 5 scoped counters, each with 5 anchors, on a 2,868-span +// trace. +func BenchmarkComputeCustomCounts_ScopedMulti(b *testing.B) { + counters := make([]config.SpanCounter, 5) + for i := range counters { + counters[i] = config.SpanCounter{ + Key: fmt.Sprintf("k%d", i), + Conditions: []*config.RulesBasedSamplerCondition{ + {Field: "name", Operator: config.EQ, Value: "db.query"}, + }, + ScopeConditions: []*config.RulesBasedSamplerCondition{ + {Field: "graphql.operation.name", Operator: config.Exists}, + }, + } + } + benchmarkComputeCustomCounts(b, counters) +} diff --git a/collect/multi_loop_test.go b/collect/multi_loop_test.go index 85e184cf88..956055aa9a 100644 --- a/collect/multi_loop_test.go +++ b/collect/multi_loop_test.go @@ -534,7 +534,7 @@ func TestCoordinatedReload(t *testing.T) { PeerQueueSize: 3000, WorkerCount: 4, }, - GetSamplerTypeVal: &config.DeterministicSamplerConfig{SampleRate: 1}, + GetSamplerTypeVal: &config.DynamicSamplerConfig{SampleRate: 1, FieldList: []string{"test"}}, ParentIdFieldNames: []string{"trace.parent_id", "parentId"}, TraceIdFieldNames: []string{"trace.trace_id", "traceId"}, SampleCache: config.SampleCacheConfig{ @@ -546,92 +546,71 @@ func TestCoordinatedReload(t *testing.T) { collector := newTestCollector(t, conf) - // Send some test spans to create dataset samplers - processedInitial := int32(0) - for i := 0; i < 10; i++ { - span := &types.Span{ - Event: &types.Event{ - APIHost: "http://api.honeycomb.io", - APIKey: legacyAPIKey, - Dataset: fmt.Sprintf("dataset-%d", i%3), - SampleRate: 1, - Timestamp: time.Now(), - Data: types.Payload{}, - }, - TraceID: fmt.Sprintf("reload-trace-%d", i), - IsRoot: true, - ArrivalTime: time.Now(), - } - if err := collector.AddSpan(span); err == nil { - atomic.AddInt32(&processedInitial, 1) - } + // waitForSamplersCreated waits until at least one worker has a sampler, + // proving traces were actually processed and makeDecision was called. + waitForSamplersCreated := func(msg string) { + t.Helper() + assert.Eventually(t, func() bool { + total := 0 + for _, worker := range collector.workers { + ch := make(chan struct{}) + worker.pause <- ch + total += len(worker.datasetSamplers) + close(ch) + } + return total > 0 + }, 2*time.Second, 10*time.Millisecond, msg) } - // Wait for initial spans to be processed - assert.Eventually(t, func() bool { - return atomic.LoadInt32(&processedInitial) >= 8 - }, 2*time.Second, 10*time.Millisecond, "Initial spans should be processed") - - // Trigger a reload - this should cause workers to recreate their samplers - collector.sendReloadSignal("hash1", "hash2") - - // Give a moment for the reload signal to be processed (reload is async) - // We'll verify the reload worked by checking that spans still get processed - time.Sleep(50 * time.Millisecond) - - // Check that samplers were recreated by sending more spans - processedAfterReload := int32(0) - for i := 0; i < 20; i++ { - span := &types.Span{ - Event: &types.Event{ - APIHost: "http://api.honeycomb.io", - APIKey: legacyAPIKey, - Dataset: "test.reload", - SampleRate: 1, - Timestamp: time.Now(), - Data: types.Payload{}, - }, - TraceID: fmt.Sprintf("after-reload-%d", i), - IsRoot: true, - ArrivalTime: time.Now(), - } - if err := collector.AddSpan(span); err == nil { - atomic.AddInt32(&processedAfterReload, 1) - } + // waitForSamplersCleared waits until all workers have empty datasetSamplers. + waitForSamplersCleared := func(msg string) { + t.Helper() + assert.Eventually(t, func() bool { + for _, worker := range collector.workers { + ch := make(chan struct{}) + worker.pause <- ch + n := len(worker.datasetSamplers) + close(ch) + if n > 0 { + return false + } + } + return true + }, 2*time.Second, 10*time.Millisecond, msg) } - // Verify spans were processed after reload - assert.Eventually(t, func() bool { - return atomic.LoadInt32(&processedAfterReload) >= 15 - }, 2*time.Second, 100*time.Millisecond, "Spans should be processed after reload") - - // Trigger another reload to verify multiple reloads work - collector.sendReloadSignal("hash2", "hash3") - time.Sleep(50 * time.Millisecond) - - // Send more spans to verify system still works - processedAfterSecondReload := int32(0) - for i := 0; i < 20; i++ { - span := &types.Span{ - Event: &types.Event{ - APIHost: "http://api.honeycomb.io", - APIKey: legacyAPIKey, - Dataset: "test.reload2", - SampleRate: 1, - Timestamp: time.Now(), - Data: types.Payload{}, - }, - TraceID: fmt.Sprintf("after-second-reload-%d", i), - IsRoot: true, - ArrivalTime: time.Now(), - } - if err := collector.AddSpan(span); err == nil { - atomic.AddInt32(&processedAfterSecondReload, 1) + sendSpans := func(n int, dataset, traceIDPrefix string) { + for i := 0; i < n; i++ { + span := &types.Span{ + Event: &types.Event{ + APIHost: "http://api.honeycomb.io", + APIKey: legacyAPIKey, + Dataset: fmt.Sprintf("%s", dataset), + SampleRate: 1, + Timestamp: time.Now(), + Data: types.Payload{}, + }, + TraceID: fmt.Sprintf("%s-%d", traceIDPrefix, i), + IsRoot: true, + ArrivalTime: time.Now(), + } + collector.AddSpan(span) //nolint:errcheck } } - // Verify spans were processed after second reload - assert.Eventually(t, func() bool { - return atomic.LoadInt32(&processedAfterSecondReload) >= 15 - }, 2*time.Second, 100*time.Millisecond, "Spans should be processed after second reload") + // Send spans and wait for workers to process them and create samplers. + sendSpans(20, "dataset", "reload-trace") + waitForSamplersCreated("samplers should be created before first reload") + + // Reload and verify all workers clear their samplers. + collector.sendReloadSignal("dataset", "hash2") + waitForSamplersCleared("samplers should be cleared after first reload") + + // Send spans again; samplers must be recreated, proving the system still works. + sendSpans(20, "dataset", "after-reload") + waitForSamplersCreated("samplers should be recreated after first reload") + + // Second reload cycle. + collector.sendReloadSignal("dataset", "hash3") + waitForSamplersCleared("samplers should be cleared after second reload") } diff --git a/config.md b/config.md index fb1b4ca340..57796cfc62 100644 --- a/config.md +++ b/config.md @@ -3,7 +3,7 @@ # Honeycomb Refinery Configuration Documentation This is the documentation for the configuration file for Honeycomb's Refinery. -It was automatically generated on 2026-02-25 at 20:49:27 UTC. +It was automatically generated on 2026-06-01 at 19:01:21 UTC. ## The Config file @@ -181,16 +181,31 @@ ReceiveKeys is a set of Honeycomb API keys that the proxy will treat specially. This list only applies to span traffic - other Honeycomb API actions will be proxied through to the upstream API directly without modifying keys. -- Not eligible for live reload. +- Eligible for live reload. - Type: `stringarray` - Example: `your-key-goes-here` +### `ReceiveKeyIDs` + +ReceiveKeyIDs is a set of Honeycomb Ingest Key IDs that the proxy will treat specially. + +When `AcceptOnlyListedKeys` is `true`, traffic using an API key whose Honeycomb ingest key ID matches an entry in this list will be accepted. +The key ID is the `id` field returned by the Honeycomb `/1/auth` endpoint; it is distinct from the full API key value. +This allows authorization based on key IDs rather than full key values, which avoids storing secret key material in the configuration file. +Both `ReceiveKeys` and `ReceiveKeyIDs` may be used simultaneously. +Note: This feature does not support legacy API keys. +Only Honeycomb Ingest Keys (which have a key ID) are compatible with this setting. + +- Eligible for live reload. +- Type: `stringarray` +- Example: `your-key-id-goes-here` + ### `AcceptOnlyListedKeys` AcceptOnlyListedKeys is a boolean flag that causes events arriving with API keys not in the `ReceiveKeys` list to be rejected. -If `true`, then only traffic using the keys listed in `ReceiveKeys` is accepted. -Events arriving with API keys not in the `ReceiveKeys` list will be rejected with an HTTP `401` error. +If `true`, then only traffic using the keys listed in `ReceiveKeys` or whose key ID is listed in `ReceiveKeyIDs` is accepted. +Events arriving with API keys not in either list will be rejected with an HTTP `401` error. If `false`, then all traffic is accepted and `ReceiveKeys` is ignored. This setting is applied **before** the `SendKey` and `SendKeyMode` settings. @@ -660,6 +675,22 @@ In rare circumstances, compression costs may outweigh the benefits, in which cas - Default: `gzip` - Options: `none`, `gzip` +### `AdditionalAttributes` + +AdditionalAttributes adds the provided attributes as resource attributes on all OpenTelemetry metrics emitted by Refinery. + +This is useful for injecting deployment-specific metadata (such as a cluster ID or environment name) into metrics so they can be filtered or grouped in the metrics backend. +Both keys and values must be strings. +When supplying via a environment variable, the value should be a string of comma-separated key-value pairs. +When supplying via the command line, the value should be a key value pair. +If multiple key-value pairs are needed, each should be supplied via its own command line flag. +The key-value pairs must use ':' as the separator. + +- Not eligible for live reload. +- Type: `map` +- Example: `pipeline.id:'12345',rollout.id:'67890'` +- Environment variable: `REFINERY_OTEL_METRICS_ADDITIONAL_ATTRIBUTES` + ## OpenTelemetry Tracing `OTelTracing` contains configuration for Refinery's own tracing. @@ -1068,6 +1099,17 @@ A trace without a `parent_id` is assumed to be a root span. - Type: `stringarray` - Example: `trace.parent_id,parentId` +### `SpanNames` + +SpanNames is the list of field names to use for the span ID. + +The first field in the list that is present on a span will be used as that span's ID. +This is required for `SpanCounters` entries that set `ScopeConditions` (per-anchor subtree counting), which must resolve each span's parent ID to a span ID in the same trace. + +- Eligible for live reload. +- Type: `stringarray` +- Example: `trace.span_id,spanId` + ## gRPC Server Parameters `GRPCServerParameters` controls the parameters of the gRPC server used to receive OpenTelemetry data in gRPC format. diff --git a/config/cmdenv.go b/config/cmdenv.go index d96f008a7f..4daa63a031 100644 --- a/config/cmdenv.go +++ b/config/cmdenv.go @@ -43,6 +43,7 @@ type CmdEnv struct { OpAMPEndpoint string `long:"opamp-server-url" env:"REFINERY_OPAMP_ENDPOINT" description:"URL of the OpAMP server to use for remote management."` TelemetryEndpoint string `long:"telemetry-endpoint" env:"REFINERY_TELEMETRY_ENDPOINT" description:"Endpoint to send Refinery's internal telemetry to. This is separate from the Honeycomb API endpoint and is used for sending metrics about Refinery's performance."` OTelMetricsAPIKey string `long:"otel-metrics-api-key" env:"REFINERY_OTEL_METRICS_API_KEY" description:"API key for OTel metrics if being sent to Honeycomb. Setting this value via a flag may expose credentials - it is recommended to use the env var or a configuration file."` + OTelMetricsAdditionalAttributes map[string]string `long:"otel-metrics-additional-attributes" env:"REFINERY_OTEL_METRICS_ADDITIONAL_ATTRIBUTES" env-delim:"," description:"Additional attributes to add as resource attributes on all OpenTelemetry metrics emitted by Refinery. When supplying via a environment variable, the value should be a string of comma-separated key-value pairs. When supplying via the command line, the value should be a key value pair. If multiple key-value pairs are needed, each should be supplied via its own command line flag. The key-value pairs must use ':' as the separator."` OTelTracesAPIKey string `long:"otel-traces-api-key" env:"REFINERY_OTEL_TRACES_API_KEY" description:"API key for OTel traces if being sent to Honeycomb. Setting this value via a flag may expose credentials - it is recommended to use the env var or a configuration file."` QueryAuthToken string `long:"query-auth-token" env:"REFINERY_QUERY_AUTH_TOKEN" description:"Token for debug/management queries. Setting this value via a flag may expose credentials - it is recommended to use the env var or a configuration file."` AvailableMemory MemorySize `long:"available-memory" env:"REFINERY_AVAILABLE_MEMORY" description:"The maximum memory available for Refinery to use (ex: 4GiB)."` diff --git a/config/config.go b/config/config.go index 224fe07d76..02e45efa93 100644 --- a/config/config.go +++ b/config/config.go @@ -151,6 +151,8 @@ type Config interface { GetAddCountsToRoot() bool + GetSpanCounters() []SpanCounter + GetConfigMetadata() []ConfigMetadata GetSampleCacheConfig() SampleCacheConfig @@ -163,6 +165,8 @@ type Config interface { GetParentIdFieldNames() []string + GetSpanIdFieldNames() []string + GetOpAMPConfig() OpAMPConfig } diff --git a/config/config_test.go b/config/config_test.go index 30fd468d4a..04f6faaf94 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -909,6 +909,23 @@ func TestAdditionalAttributes(t *testing.T) { assert.Equal(t, map[string]string{"name": "foo", "other": "bar", "another": "OneHundred"}, c.GetAdditionalAttributes()) } +func TestOTelMetricsAdditionalAttributes(t *testing.T) { + cm := makeYAML( + "General.ConfigurationVersion", 2, + "OTelMetrics.AdditionalAttributes", map[string]string{ + "cluster.id": "my-cluster", + "environment": "production", + }, + ) + rm := makeYAML("ConfigVersion", 2) + config, rules := createTempConfigs(t, cm, rm) + c, err := getConfig([]string{"--no-validate", "--config", config, "--rules_config", rules}) + assert.NoError(t, err) + + otelCfg := c.GetOTelMetricsConfig() + assert.Equal(t, map[string]string{"cluster.id": "my-cluster", "environment": "production"}, otelCfg.AdditionalAttributes) +} + func TestHoneycombIdFieldsConfig(t *testing.T) { cm := makeYAML( "General.ConfigurationVersion", 2, diff --git a/config/file_config.go b/config/file_config.go index 43206dae90..9caf6e32bd 100644 --- a/config/file_config.go +++ b/config/file_config.go @@ -92,16 +92,21 @@ type NetworkConfig struct { type AccessKeyConfig struct { ReceiveKeys []string `yaml:"ReceiveKeys" default:"[]"` + ReceiveKeyIDs []string `yaml:"ReceiveKeyIDs" default:"[]"` SendKey string `yaml:"SendKey" cmdenv:"SendKey"` SendKeyMode string `yaml:"SendKeyMode" default:"none"` AcceptOnlyListedKeys bool `yaml:"AcceptOnlyListedKeys"` } -// IsAccepted checks if the given key is in the list of received keys or a configured SendKey. -// if not, it returns an error with the key truncated to 8 characters for logging. -func (a *AccessKeyConfig) IsAccepted(key string) error { +// IsAccepted checks if the given key (or its associated key ID) is authorized. +// keyID is the Honeycomb ingest key ID returned by the /1/auth endpoint; it may +// be empty if the lookup has not yet occurred or if the key is a legacy key. +// If not accepted, it returns an error with the key truncated to 8 characters for logging. +func (a *AccessKeyConfig) IsAccepted(key, keyID string) error { if a.AcceptOnlyListedKeys { - if (len(a.SendKey) > 0 && key == a.SendKey) || slices.Contains(a.ReceiveKeys, key) { + if (len(a.SendKey) > 0 && key == a.SendKey) || + slices.Contains(a.ReceiveKeys, key) || + (keyID != "" && slices.Contains(a.ReceiveKeyIDs, keyID)) { return nil } @@ -110,10 +115,15 @@ func (a *AccessKeyConfig) IsAccepted(key string) error { return nil } +// HasKeyIDs returns true if ReceiveKeyIDs has been configured. +func (a *AccessKeyConfig) HasKeyIDs() bool { + return len(a.ReceiveKeyIDs) > 0 +} + // GetReplaceKey checks the given API key against the configuration // and possibly replaces it with the configured SendKey, if the settings so indicate. // It returns the key to use, or an error if the key is invalid given the settings. -func (a *AccessKeyConfig) GetReplaceKey(apiKey string) (string, error) { +func (a *AccessKeyConfig) GetReplaceKey(apiKey, keyID string) (string, error) { if a.SendKey != "" { overwriteWith := "" switch a.SendKeyMode { @@ -129,10 +139,10 @@ func (a *AccessKeyConfig) GetReplaceKey(apiKey string) (string, error) { overwriteWith = a.SendKey } case "listedonly": - // only replace keys that are listed in the `ReceiveKeys` list, + // only replace keys that are listed in the `ReceiveKeys` or `ReceiveKeyIDs` list, // otherwise use original key overwriteWith = apiKey - if slices.Contains(a.ReceiveKeys, apiKey) { + if slices.Contains(a.ReceiveKeys, apiKey) || (keyID != "" && slices.Contains(a.ReceiveKeyIDs, keyID)) { overwriteWith = a.SendKey } case "missingonly": @@ -143,11 +153,11 @@ func (a *AccessKeyConfig) GetReplaceKey(apiKey string) (string, error) { overwriteWith = a.SendKey } case "unlisted": - // only replace nonblank keys that are NOT listed in the `ReceiveKeys` list + // only replace nonblank keys that are NOT listed in the `ReceiveKeys` or `ReceiveKeyIDs` list // otherwise use original key if apiKey != "" { overwriteWith = apiKey - if !slices.Contains(a.ReceiveKeys, apiKey) { + if !slices.Contains(a.ReceiveKeys, apiKey) && !(keyID != "" && slices.Contains(a.ReceiveKeyIDs, keyID)) { overwriteWith = a.SendKey } } @@ -189,10 +199,10 @@ func (dt *DefaultTrue) UnmarshalText(text []byte) error { } type RefineryTelemetryConfig struct { - AddRuleReasonToTrace bool `yaml:"AddRuleReasonToTrace"` - AddSpanCountToRoot *DefaultTrue `yaml:"AddSpanCountToRoot" default:"true"` // Avoid pointer woe on access, use GetAddSpanCountToRoot() instead. - AddCountsToRoot bool `yaml:"AddCountsToRoot"` - AddHostMetadataToTrace *DefaultTrue `yaml:"AddHostMetadataToTrace" default:"true"` // Avoid pointer woe on access, use GetAddHostMetadataToTrace() instead. + AddRuleReasonToTrace bool `yaml:"AddRuleReasonToTrace"` + AddSpanCountToRoot *DefaultTrue `yaml:"AddSpanCountToRoot" default:"true"` // Avoid pointer woe on access, use GetAddSpanCountToRoot() instead. + AddCountsToRoot bool `yaml:"AddCountsToRoot"` + AddHostMetadataToTrace *DefaultTrue `yaml:"AddHostMetadataToTrace" default:"true"` // Avoid pointer woe on access, use GetAddHostMetadataToTrace() instead. } type TracesConfig struct { @@ -268,12 +278,13 @@ type PrometheusMetricsConfig struct { } type OTelMetricsConfig struct { - Enabled bool `yaml:"Enabled" default:"false"` - APIHost string `yaml:"APIHost" default:"https://api.honeycomb.io" cmdenv:"TelemetryEndpoint"` - APIKey string `yaml:"APIKey" cmdenv:"OTelMetricsAPIKey,HoneycombAPIKey"` - Dataset string `yaml:"Dataset" default:"Refinery Metrics"` - Compression string `yaml:"Compression" default:"gzip"` - ReportingInterval Duration `yaml:"ReportingInterval" default:"30s"` + Enabled bool `yaml:"Enabled" default:"false"` + APIHost string `yaml:"APIHost" default:"https://api.honeycomb.io" cmdenv:"TelemetryEndpoint"` + APIKey string `yaml:"APIKey" cmdenv:"OTelMetricsAPIKey,HoneycombAPIKey"` + Dataset string `yaml:"Dataset" default:"Refinery Metrics"` + Compression string `yaml:"Compression" default:"gzip"` + ReportingInterval Duration `yaml:"ReportingInterval" default:"30s"` + AdditionalAttributes map[string]string `yaml:"AdditionalAttributes" default:"{}" cmdenv:"OTelMetricsAdditionalAttributes"` } type OTelTracingConfig struct { @@ -370,6 +381,7 @@ type SpecializedConfig struct { type IDFieldsConfig struct { TraceNames []string `yaml:"TraceNames" default:"[\"trace.trace_id\",\"traceId\"]"` ParentNames []string `yaml:"ParentNames" default:"[\"trace.parent_id\",\"parentId\"]"` + SpanNames []string `yaml:"SpanNames" default:"[\"trace.span_id\",\"spanId\"]"` } // GRPCServerParameters allow you to configure the GRPC ServerParameters used @@ -597,6 +609,13 @@ func writeYAMLToFile(data any, filename string) error { // nil, it uses the command line arguments. // It also dumps the config and rules to the given files, if specified, which // will cause the program to exit. +// +// Return values follow an intentional two-level contract: +// - (nil, err): fatal error β€” config could not be loaded or has hard validation +// errors; the caller should not proceed. +// - (cfg, err): non-fatal warning β€” config loaded successfully but has deprecation +// or advisory warnings; the caller may log err and proceed using cfg. +// - (cfg, nil): success. func NewConfig(opts *CmdEnv, currentVersion ...string) (Config, error) { cData, rData, err := newConfigAndRules(opts) if err != nil { @@ -604,8 +623,7 @@ func NewConfig(opts *CmdEnv, currentVersion ...string) (Config, error) { } cfg, err := newFileConfig(opts, cData, rData, currentVersion...) - // only exit if we have no config at all; if it fails validation, we'll - // do the rest and return it anyway + // only exit on fatal errors (cfg == nil); non-nil cfg with err means warnings only if err != nil && cfg == nil { return nil, err } @@ -1116,6 +1134,13 @@ func (f *fileConfig) GetAddCountsToRoot() bool { return f.mainConfig.Telemetry.AddCountsToRoot } +func (f *fileConfig) GetSpanCounters() []SpanCounter { + f.mux.RLock() + defer f.mux.RUnlock() + + return f.rulesConfig.SpanCounters +} + func (f *fileConfig) GetSampleCacheConfig() SampleCacheConfig { f.mux.RLock() defer f.mux.RUnlock() @@ -1144,6 +1169,13 @@ func (f *fileConfig) GetParentIdFieldNames() []string { return f.mainConfig.IDFieldNames.ParentNames } +func (f *fileConfig) GetSpanIdFieldNames() []string { + f.mux.RLock() + defer f.mux.RUnlock() + + return f.mainConfig.IDFieldNames.SpanNames +} + func (f *fileConfig) GetConfigMetadata() []ConfigMetadata { ret := make([]ConfigMetadata, 2) ret[0] = ConfigMetadata{ diff --git a/config/file_config_test.go b/config/file_config_test.go index d09e9e3ba6..0d6bd3945f 100644 --- a/config/file_config_test.go +++ b/config/file_config_test.go @@ -2,6 +2,7 @@ package config import ( "errors" + "fmt" "runtime" "testing" @@ -56,6 +57,7 @@ func Test_GetQueueSizesPerWorker(t *testing.T) { func TestAccessKeyConfig_GetReplaceKey(t *testing.T) { type fields struct { ReceiveKeys []string + ReceiveKeyIDs []string SendKey string SendKeyMode string AcceptOnlyListedKeys bool @@ -71,6 +73,12 @@ func TestAccessKeyConfig_GetReplaceKey(t *testing.T) { SendKey: "sendkey", SendKeyMode: "listedonly", } + fListedWithKeyIDs := fields{ + ReceiveKeys: []string{"key1", "key2"}, + ReceiveKeyIDs: []string{"kid1", "kid2"}, + SendKey: "sendkey", + SendKeyMode: "listedonly", + } fMissing := fields{ ReceiveKeys: []string{"key1", "key2"}, SendKey: "sendkey", @@ -81,36 +89,50 @@ func TestAccessKeyConfig_GetReplaceKey(t *testing.T) { SendKey: "sendkey", SendKeyMode: "unlisted", } + fUnlistedWithKeyIDs := fields{ + ReceiveKeys: []string{"key1", "key2"}, + ReceiveKeyIDs: []string{"kid1", "kid2"}, + SendKey: "sendkey", + SendKeyMode: "unlisted", + } tests := []struct { name string fields fields apiKey string + keyID string want string wantErr bool }{ - {"send all known", fSendAll, "key1", "sendkey", false}, - {"send all unknown", fSendAll, "userkey", "sendkey", false}, - {"send all missing", fSendAll, "", "sendkey", false}, - {"listed known", fListed, "key1", "sendkey", false}, - {"listed unknown", fListed, "userkey", "userkey", false}, - {"listed missing", fListed, "", "", true}, - {"missing known", fMissing, "key1", "key1", false}, - {"missing unknown", fMissing, "userkey", "userkey", false}, - {"missing missing", fMissing, "", "sendkey", false}, - {"unlisted known", fUnlisted, "key1", "key1", false}, - {"unlisted unknown", fUnlisted, "userkey", "sendkey", false}, - {"unlisted missing", fUnlisted, "", "", true}, + {"send all known", fSendAll, "key1", "", "sendkey", false}, + {"send all unknown", fSendAll, "userkey", "", "sendkey", false}, + {"send all missing", fSendAll, "", "", "sendkey", false}, + {"listed known", fListed, "key1", "", "sendkey", false}, + {"listed unknown", fListed, "userkey", "", "userkey", false}, + {"listed missing", fListed, "", "", "", true}, + {"listed by keyID known", fListedWithKeyIDs, "unknownkey", "kid1", "sendkey", false}, + {"listed by keyID unknown", fListedWithKeyIDs, "unknownkey", "unknownkid", "unknownkey", false}, + {"listed by keyID empty", fListedWithKeyIDs, "unknownkey", "", "unknownkey", false}, + {"missing known", fMissing, "key1", "", "key1", false}, + {"missing unknown", fMissing, "userkey", "", "userkey", false}, + {"missing missing", fMissing, "", "", "sendkey", false}, + {"unlisted known", fUnlisted, "key1", "", "key1", false}, + {"unlisted unknown", fUnlisted, "userkey", "", "sendkey", false}, + {"unlisted missing", fUnlisted, "", "", "", true}, + {"unlisted by keyID known", fUnlistedWithKeyIDs, "unknownkey", "kid1", "unknownkey", false}, + {"unlisted by keyID unknown", fUnlistedWithKeyIDs, "unknownkey", "unknownkid", "sendkey", false}, + {"unlisted by keyID empty", fUnlistedWithKeyIDs, "unknownkey", "", "sendkey", false}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { a := &AccessKeyConfig{ ReceiveKeys: tt.fields.ReceiveKeys, + ReceiveKeyIDs: tt.fields.ReceiveKeyIDs, SendKey: tt.fields.SendKey, SendKeyMode: tt.fields.SendKeyMode, AcceptOnlyListedKeys: tt.fields.AcceptOnlyListedKeys, } - got, err := a.GetReplaceKey(tt.apiKey) + got, err := a.GetReplaceKey(tt.apiKey, tt.keyID) if (err != nil) != tt.wantErr { t.Errorf("AccessKeyConfig.GetReplaceKey() error = %v, wantErr %v", err, tt.wantErr) return @@ -125,6 +147,7 @@ func TestAccessKeyConfig_GetReplaceKey(t *testing.T) { func TestAccessKeyConfig_IsAccepted(t *testing.T) { type fields struct { ReceiveKeys []string + ReceiveKeyIDs []string SendKey string SendKeyMode string AcceptOnlyListedKeys bool @@ -133,24 +156,33 @@ func TestAccessKeyConfig_IsAccepted(t *testing.T) { name string fields fields key string + keyID string want error }{ - {"no keys", fields{}, "key1", nil}, - {"known key", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true}, "key1", nil}, - {"unknown key", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true}, "key2", errors.New("api key key2... not found in list of authorized keys")}, - {"reject missing key with sendkey configured", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true, SendKey: "key2"}, "", errors.New("api key ... not found in list of authorized keys")}, - {"reject missing key without sendkey configured", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true}, "", errors.New("api key ... not found in list of authorized keys")}, - {"accept sendkey", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true, SendKey: "key2"}, "key2", nil}, + {"no keys", fields{}, "key1", "", nil}, + {"known key", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true}, "key1", "", nil}, + {"unknown key", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true}, "key2", "", errors.New("api key key2... not found in list of authorized keys")}, + {"reject missing key with sendkey configured", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true, SendKey: "key2"}, "", "", errors.New("api key ... not found in list of authorized keys")}, + {"reject missing key without sendkey configured", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true}, "", "", errors.New("api key ... not found in list of authorized keys")}, + {"accept sendkey", fields{ReceiveKeys: []string{"key1"}, AcceptOnlyListedKeys: true, SendKey: "key2"}, "key2", "", nil}, + // ReceiveKeyIDs tests + {"known key id", fields{ReceiveKeyIDs: []string{"kid1"}, AcceptOnlyListedKeys: true}, "anykey", "kid1", nil}, + {"unknown key id", fields{ReceiveKeyIDs: []string{"kid1"}, AcceptOnlyListedKeys: true}, "anykey", "kid2", errors.New("api key anykey... not found in list of authorized keys")}, + {"key id with empty keyID param", fields{ReceiveKeyIDs: []string{"kid1"}, AcceptOnlyListedKeys: true}, "anykey", "", errors.New("api key anykey... not found in list of authorized keys")}, + {"accept by key id when full key not listed", fields{ReceiveKeys: []string{"key1"}, ReceiveKeyIDs: []string{"kid1"}, AcceptOnlyListedKeys: true}, "key2", "kid1", nil}, + {"accept by full key when key id not listed", fields{ReceiveKeys: []string{"key1"}, ReceiveKeyIDs: []string{"kid1"}, AcceptOnlyListedKeys: true}, "key1", "kid2", nil}, + {"reject when neither full key nor key id match", fields{ReceiveKeys: []string{"key1"}, ReceiveKeyIDs: []string{"kid1"}, AcceptOnlyListedKeys: true}, "key2", "kid2", errors.New("api key key2... not found in list of authorized keys")}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { a := &AccessKeyConfig{ ReceiveKeys: tt.fields.ReceiveKeys, + ReceiveKeyIDs: tt.fields.ReceiveKeyIDs, SendKey: tt.fields.SendKey, SendKeyMode: tt.fields.SendKeyMode, AcceptOnlyListedKeys: tt.fields.AcceptOnlyListedKeys, } - err := a.IsAccepted(tt.key) + err := a.IsAccepted(tt.key, tt.keyID) if tt.want == nil { require.NoError(t, err) return @@ -160,6 +192,104 @@ func TestAccessKeyConfig_IsAccepted(t *testing.T) { } } +func BenchmarkAccessKeyConfig_IsAccepted(b *testing.B) { + // Generate realistic key lists + makeKeys := func(n int) []string { + keys := make([]string, n) + for i := range keys { + keys[i] = fmt.Sprintf("key-%06d", i) + } + return keys + } + + benchmarks := []struct { + name string + config AccessKeyConfig + key string + keyID string + }{ + { + name: "no_filtering", + config: AccessKeyConfig{AcceptOnlyListedKeys: false}, + key: "anykey", + keyID: "", + }, + { + name: "ReceiveKeys_10_match_last", + config: AccessKeyConfig{ + ReceiveKeys: makeKeys(10), + AcceptOnlyListedKeys: true, + }, + key: "key-000009", + keyID: "", + }, + { + name: "ReceiveKeys_100_match_last", + config: AccessKeyConfig{ + ReceiveKeys: makeKeys(100), + AcceptOnlyListedKeys: true, + }, + key: "key-000099", + keyID: "", + }, + { + name: "ReceiveKeys_100_no_match", + config: AccessKeyConfig{ + ReceiveKeys: makeKeys(100), + AcceptOnlyListedKeys: true, + }, + key: "unknown-key", + keyID: "", + }, + { + name: "ReceiveKeyIDs_10_match_last", + config: AccessKeyConfig{ + ReceiveKeyIDs: makeKeys(10), + AcceptOnlyListedKeys: true, + }, + key: "anykey", + keyID: "key-000009", + }, + { + name: "ReceiveKeyIDs_100_match_last", + config: AccessKeyConfig{ + ReceiveKeyIDs: makeKeys(100), + AcceptOnlyListedKeys: true, + }, + key: "anykey", + keyID: "key-000099", + }, + { + name: "ReceiveKeyIDs_100_no_match", + config: AccessKeyConfig{ + ReceiveKeyIDs: makeKeys(100), + AcceptOnlyListedKeys: true, + }, + key: "anykey", + keyID: "unknown-kid", + }, + { + name: "both_100_match_by_keyID", + config: AccessKeyConfig{ + ReceiveKeys: makeKeys(100), + ReceiveKeyIDs: makeKeys(100), + AcceptOnlyListedKeys: true, + }, + key: "unknown-key", + keyID: "key-000050", + }, + } + + for _, bm := range benchmarks { + b.Run(bm.name, func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + _ = bm.config.IsAccepted(bm.key, bm.keyID) + } + }) + } +} + func TestCalculateSamplerKey(t *testing.T) { testCases := []struct { name string diff --git a/config/metadata/configMeta.yaml b/config/metadata/configMeta.yaml index 289f53e905..5907c23b3e 100644 --- a/config/metadata/configMeta.yaml +++ b/config/metadata/configMeta.yaml @@ -213,7 +213,7 @@ groups: valuetype: stringarray v1name: APIKeys example: "your-key-goes-here" - reload: false + reload: true validations: - type: elementType arg: string @@ -223,20 +223,45 @@ groups: will be proxied through to the upstream API directly without modifying keys. + - name: ReceiveKeyIDs + type: stringarray + valuetype: stringarray + example: "your-key-id-goes-here" + firstversion: v3.2 + reload: true + validations: + - type: elementType + arg: string + summary: is a set of Honeycomb Ingest Key IDs that the proxy will treat specially. + description: > + When `AcceptOnlyListedKeys` is `true`, traffic using an API key whose + Honeycomb ingest key ID matches an entry in this list will be accepted. + The key ID is the `id` field returned by the Honeycomb `/1/auth` + endpoint; it is distinct from the full API key value. + + This allows authorization based on key IDs rather than full key values, + which avoids storing secret key material in the configuration file. + Both `ReceiveKeys` and `ReceiveKeyIDs` may be used simultaneously. + + Note: This feature does not support legacy API keys. Only Honeycomb + Ingest Keys (which have a key ID) are compatible with this setting. + - name: AcceptOnlyListedKeys type: bool valuetype: conditional extra: nostar APIKeys default: false reload: true - validation: + validations: - type: requiredWith arg: ReceiveKeys + - type: requiredWith + arg: ReceiveKeyIDs summary: is a boolean flag that causes events arriving with API keys not in the `ReceiveKeys` list to be rejected. description: > - If `true`, then only traffic using the keys listed in `ReceiveKeys` is - accepted. Events arriving with API keys not in the `ReceiveKeys` list - will be rejected with an HTTP `401` error. + If `true`, then only traffic using the keys listed in `ReceiveKeys` or + whose key ID is listed in `ReceiveKeyIDs` is accepted. Events arriving + with API keys not in either list will be rejected with an HTTP `401` error. If `false`, then all traffic is accepted and `ReceiveKeys` is ignored. @@ -673,17 +698,14 @@ groups: valuetype: map example: "pipeline.id:'12345',rollout.id:'67890'" reload: false - validations: - - type: elementType - arg: string summary: adds the provided attributes to all logs written by the Honeycomb logger. envvar: REFINERY_HONEYCOMB_LOGGER_ADDITIONAL_ATTRIBUTES commandline: logger-additional-attributes description: > - When supplying via a environment variable, the value should be a string of comma-separated key-value pairs. - When supplying via the command line, the value should be a key value pair. - If multiple key-value pairs are needed, each should be supplied via its own command line flag. - The key-value pairs must use ':' as the separator. + When supplying via a environment variable, the value should be a string of comma-separated key-value pairs. + When supplying via the command line, the value should be a key value pair. + If multiple key-value pairs are needed, each should be supplied via its own command line flag. + The key-value pairs must use ':' as the separator. - name: StdoutLogger title: "Stdout Logger" @@ -916,6 +938,29 @@ groups: compression costs may outweigh the benefits, in which case `none` may be used. + - name: AdditionalAttributes + type: map + valuetype: map + example: "pipeline.id:'12345',rollout.id:'67890'" + reload: false + firstversion: v3.2 + validations: + - type: elementType + arg: string + summary: adds the provided attributes as resource attributes on all OpenTelemetry metrics emitted by Refinery. + envvar: REFINERY_OTEL_METRICS_ADDITIONAL_ATTRIBUTES + commandline: otel-metrics-additional-attributes + description: > + This is useful for injecting deployment-specific metadata (such as + a cluster ID or environment name) into metrics so they can be + filtered or grouped in the metrics backend. + Both keys and values must be strings. + + When supplying via a environment variable, the value should be a string of comma-separated key-value pairs. + When supplying via the command line, the value should be a key value pair. + If multiple key-value pairs are needed, each should be supplied via its own command line flag. + The key-value pairs must use ':' as the separator. + - name: OTelTracing title: "OpenTelemetry Tracing" description: contains configuration for Refinery's own tracing. @@ -1723,6 +1768,21 @@ groups: as the parent ID. A trace without a `parent_id` is assumed to be a root span. + - name: SpanNames + type: stringarray + valuetype: stringarray + example: "trace.span_id,spanId" + reload: true + validations: + - type: elementType + arg: string + summary: is the list of field names to use for the span ID. + description: > + The first field in the list that is present on a span will be used + as that span's ID. This is required for `SpanCounters` entries that + set `ScopeConditions` (per-anchor subtree counting), which must + resolve each span's parent ID to a span ID in the same trace. + - name: GRPCServerParameters title: "gRPC Server Parameters" description: > diff --git a/config/metadata/rulesMeta.yaml b/config/metadata/rulesMeta.yaml index b80e4ee983..0d10217bad 100644 --- a/config/metadata/rulesMeta.yaml +++ b/config/metadata/rulesMeta.yaml @@ -738,3 +738,74 @@ groups: The best practice is to always specify `Datatype`; this avoids ambiguity, allows for more accurate comparisons, and offers a minor performance improvement. + + - name: SpanCounters + title: "Custom Span Count Configuration" + sortorder: 80 + description: > + Defines a single custom span counter. Each counter has a Key that names + the field written to a target span and an optional list of Conditions + that must all match for a span to be counted. By default the trace-wide + count is written to the root span under Key. When ScopeConditions is + set, every span matching ScopeConditions instead receives the count of + matching descendant spans in its own subtree; setting RootKey + alongside additionally writes the trace-wide total to the root span + under RootKey. If no root span exists when the trace is sent, root + writes go to the first non-annotation span instead. + fields: + - name: Key + type: string + validations: + - type: notempty + summary: is the field name written to the target span with the counter value. + description: > + The name of the field that will be added to each target span. Must + not be empty. When `ScopeConditions` is set, this is the field + written to each anchor span; when unset (the original behavior), + this is the field written to the root span. Keys in the + `meta.refinery.` namespace are reserved for Refinery's own metadata + and are rejected at validation. Keys starting with `meta.` produce + a warning, because int fields with a value of `0` cannot be + distinguished from a missing field on the wire β€” meaning + zero-count anchors will appear absent to downstream queries. + + - name: RootKey + type: string + summary: optional field name written to the root span with the trace-wide total. + description: > + Only meaningful when `ScopeConditions` is set. Setting `RootKey` + opts the root span into receiving the trace-wide total, written + under this field name (which is typically different from `Key` + so per-anchor counts and the trace-wide total can be queried + independently). If `RootKey` is left empty on a scoped counter + the root receives no write. Ignored (with a validation warning) + when `ScopeConditions` is empty β€” unscoped counters always write + `Key` to the root. Subject to the same reserved-namespace rules + as `Key` and counts as a separate field for cross-counter + uniqueness checks. + + - name: Conditions + type: objectarray + summary: is the list of conditions a span must satisfy to be counted. + description: > + All conditions must match for a span to be counted. If empty, every + span in the trace is counted. Uses the same condition format as + rules-based sampler conditions. An anchor span (one matching + `ScopeConditions`) is also tested against Conditions like any + other span β€” if it matches, it counts itself. + + - name: ScopeConditions + type: objectarray + summary: is an optional list of conditions selecting per-anchor target spans. + description: > + When set, each span satisfying all of these conditions becomes an + "anchor" and receives the count of matching descendant spans in its + own subtree (including the anchor span itself when it matches + `Conditions`). When omitted, the counter writes a single trace-wide + total to the root span β€” the original SpanCounter behavior. Set + `RootKey` alongside `ScopeConditions` to additionally emit the + trace-wide total on the root. Nested anchors are not + special-cased: an outer anchor's count includes the descendant + subtree even if it crosses an inner anchor. Uses the same + condition format as rules-based sampler conditions; the + trace-level `has-root-span` operator is rejected at validation. diff --git a/config/mock.go b/config/mock.go index 785197a795..4ed6565e2a 100644 --- a/config/mock.go +++ b/config/mock.go @@ -52,6 +52,7 @@ type MockConfig struct { AdditionalErrorFields []string AddSpanCountToRoot bool AddCountsToRoot bool + SpanCounters []SpanCounter CacheOverrunStrategy string SampleCache SampleCacheConfig StressRelief StressReliefConfig @@ -59,6 +60,7 @@ type MockConfig struct { AdditionalHeaders map[string]string TraceIdFieldNames []string ParentIdFieldNames []string + SpanIdFieldNames []string CfgMetadata []ConfigMetadata CfgHash string RulesHash string @@ -415,6 +417,13 @@ func (f *MockConfig) GetAddCountsToRoot() bool { return f.AddSpanCountToRoot } +func (f *MockConfig) GetSpanCounters() []SpanCounter { + f.Mux.RLock() + defer f.Mux.RUnlock() + + return f.SpanCounters +} + func (f *MockConfig) GetSampleCacheConfig() SampleCacheConfig { f.Mux.RLock() defer f.Mux.RUnlock() @@ -447,6 +456,13 @@ func (f *MockConfig) GetParentIdFieldNames() []string { return f.ParentIdFieldNames } +func (f *MockConfig) GetSpanIdFieldNames() []string { + f.Mux.RLock() + defer f.Mux.RUnlock() + + return f.SpanIdFieldNames +} + func (f *MockConfig) GetConfigMetadata() []ConfigMetadata { f.Mux.RLock() defer f.Mux.RUnlock() diff --git a/config/sampler_config.go b/config/sampler_config.go index 2560d322e8..0cc4ce00b5 100644 --- a/config/sampler_config.go +++ b/config/sampler_config.go @@ -172,8 +172,9 @@ func (v *RulesBasedDownstreamSampler) NameMeaningfulRate() string { } type V2SamplerConfig struct { - RulesVersion int `json:"rulesversion" yaml:"RulesVersion" validate:"required,ge=2"` - Samplers map[string]*V2SamplerChoice `json:"samplers" yaml:"Samplers,omitempty" validate:"required"` + RulesVersion int `json:"rulesversion" yaml:"RulesVersion" validate:"required,ge=2"` + Samplers map[string]*V2SamplerChoice `json:"samplers" yaml:"Samplers,omitempty" validate:"required"` + SpanCounters []SpanCounter `json:"spancounters,omitempty" yaml:"SpanCounters,omitempty" toml:",omitempty"` } type GetSamplingFielder interface { diff --git a/config/span_counter_config.go b/config/span_counter_config.go new file mode 100644 index 0000000000..f2a5f6ff23 --- /dev/null +++ b/config/span_counter_config.go @@ -0,0 +1,269 @@ +package config + +import "strings" + +// SpanData is the interface required for matching span fields in a SpanCounter. +// It is satisfied by *types.Payload. +type SpanData interface { + Get(key string) any + Exists(key string) bool +} + +// SpanCounter defines a custom span count to be computed and emitted. +// +// By default (no ScopeConditions), spans are counted if they satisfy all +// Conditions, and the trace-wide total is written to the root span under Key. +// +// When ScopeConditions is set, the counter is computed per-anchor: every span +// matching ScopeConditions receives the count of matching descendant spans in +// its own subtree (including itself if it matches Conditions). Setting +// RootKey alongside ScopeConditions additionally writes the trace-wide total +// to the root span under RootKey. +type SpanCounter struct { + Key string `yaml:"Key"` + RootKey string `yaml:"RootKey,omitempty"` + Conditions []*RulesBasedSamplerCondition `yaml:"Conditions,omitempty"` + ScopeConditions []*RulesBasedSamplerCondition `yaml:"ScopeConditions,omitempty"` +} + +// Init initializes all conditions. Must be called before MatchesSpan. +func (c *SpanCounter) Init() error { + for _, cond := range c.Conditions { + if err := cond.Init(); err != nil { + return err + } + } + for _, cond := range c.ScopeConditions { + if err := cond.Init(); err != nil { + return err + } + } + return nil +} + +// MatchesSpan returns true if the span satisfies all Conditions. +// span is the span being tested; root is the root span's data (may be nil). +func (c *SpanCounter) MatchesSpan(span SpanData, root SpanData) bool { + return evaluateConditions(c.Conditions, span, root) +} + +// MatchesScope returns true if the span satisfies all ScopeConditions. +// Returns false if ScopeConditions is empty (an unscoped counter has no +// per-anchor anchors). span is the span being tested; root is the root +// span's data (may be nil). +func (c *SpanCounter) MatchesScope(span SpanData, root SpanData) bool { + if len(c.ScopeConditions) == 0 { + return false + } + return evaluateConditions(c.ScopeConditions, span, root) +} + +// ShouldEmitTotalOnRoot reports whether the trace-wide total should be +// written to the root span. Unscoped counters always do (today's behavior: +// the only output is a root total under Key). Scoped counters do only when +// RootKey is explicitly set β€” opting in by naming the root's field. +func (c *SpanCounter) ShouldEmitTotalOnRoot() bool { + if len(c.ScopeConditions) == 0 { + return true + } + return c.RootKey != "" +} + +// EffectiveRootKey returns the field name to use when writing the trace-wide +// total to the root span. When ScopeConditions is set, the root write uses +// RootKey (which is also what opts the root into receiving a write at all); +// otherwise (unscoped) it uses Key, preserving today's behavior. +func (c *SpanCounter) EffectiveRootKey() string { + if len(c.ScopeConditions) > 0 { + return c.RootKey + } + return c.Key +} + +func evaluateConditions(conditions []*RulesBasedSamplerCondition, span SpanData, root SpanData) bool { + for _, cond := range conditions { + var value any + var exists bool + for _, field := range cond.Fields { + if strings.HasPrefix(field, RootPrefix) { + if root != nil { + f := field[len(RootPrefix):] + if root.Exists(f) { + value = root.Get(f) + exists = true + break + } + } + } else { + if span.Exists(field) { + value = span.Get(field) + exists = true + break + } + } + } + + if cond.Matches != nil { + if !cond.Matches(value, exists) { + return false + } + } else { + if !ConditionMatchesValue(cond, value, exists) { + return false + } + } + } + return true +} + +// ConditionMatchesValue evaluates a condition against a value when the +// condition's Matches function has not been set (i.e. Datatype is unspecified). +// This is exported so that sample/rules.go can share the implementation. +func ConditionMatchesValue(condition *RulesBasedSamplerCondition, value interface{}, exists bool) bool { + var match bool + switch exists { + case true: + switch condition.Operator { + case Exists: + match = exists + case NEQ: + if comparison, ok := compareValues(value, condition.Value); ok { + match = comparison != equal + } + case EQ: + if comparison, ok := compareValues(value, condition.Value); ok { + match = comparison == equal + } + case GT: + if comparison, ok := compareValues(value, condition.Value); ok { + match = comparison == more + } + case GTE: + if comparison, ok := compareValues(value, condition.Value); ok { + match = comparison == more || comparison == equal + } + case LT: + if comparison, ok := compareValues(value, condition.Value); ok { + match = comparison == less + } + case LTE: + if comparison, ok := compareValues(value, condition.Value); ok { + match = comparison == less || comparison == equal + } + } + case false: + switch condition.Operator { + case NotExists: + match = !exists + } + } + return match +} + +const ( + less = -1 + equal = 0 + more = 1 +) + +// compareValues compares two values of potentially mixed numeric types. +// a is the span field value (float64, int64, bool, or string). +// b is the condition value (float64, int64, int, bool, or string). +func compareValues(a, b interface{}) (int, bool) { + if a == nil { + if b == nil { + return equal, true + } + return less, true + } + + if b == nil { + return more, true + } + + switch at := a.(type) { + case int64: + switch bt := b.(type) { + case int: + i := int(at) + switch { + case i < bt: + return less, true + case i > bt: + return more, true + default: + return equal, true + } + case int64: + switch { + case at < bt: + return less, true + case at > bt: + return more, true + default: + return equal, true + } + case float64: + f := float64(at) + switch { + case f < bt: + return less, true + case f > bt: + return more, true + default: + return equal, true + } + } + case float64: + switch bt := b.(type) { + case int: + f := float64(bt) + switch { + case at < f: + return less, true + case at > f: + return more, true + default: + return equal, true + } + case int64: + f := float64(bt) + switch { + case at < f: + return less, true + case at > f: + return more, true + default: + return equal, true + } + case float64: + switch { + case at < bt: + return less, true + case at > bt: + return more, true + default: + return equal, true + } + } + case bool: + switch bt := b.(type) { + case bool: + switch { + case !at && bt: + return less, true + case at && !bt: + return more, true + default: + return equal, true + } + } + case string: + switch bt := b.(type) { + case string: + return strings.Compare(at, bt), true + } + } + + return equal, false +} diff --git a/config/span_counter_config_test.go b/config/span_counter_config_test.go new file mode 100644 index 0000000000..cde4f91e37 --- /dev/null +++ b/config/span_counter_config_test.go @@ -0,0 +1,538 @@ +package config + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// spanData is a simple map-backed implementation of SpanData for tests. +type spanData map[string]any + +func (s spanData) Get(key string) any { return s[key] } +func (s spanData) Exists(key string) bool { _, ok := s[key]; return ok } + +// cond builds an initialized RulesBasedSamplerCondition from a field name, +// operator, and optional value. It calls Init() so that the Matches function +// is set when Datatype is empty (the ConditionMatchesValue path). +func cond(field, operator string, value any) *RulesBasedSamplerCondition { + c := &RulesBasedSamplerCondition{ + Field: field, + Operator: operator, + Value: value, + } + if err := c.Init(); err != nil { + panic("cond Init: " + err.Error()) + } + return c +} + +// condTyped builds an initialized condition with an explicit Datatype, which +// causes Init to set a type-coercing Matches function instead of falling +// through to ConditionMatchesValue. +func condTyped(field, operator string, value any, datatype string) *RulesBasedSamplerCondition { + c := &RulesBasedSamplerCondition{ + Field: field, + Operator: operator, + Value: value, + Datatype: datatype, + } + if err := c.Init(); err != nil { + panic("condTyped Init: " + err.Error()) + } + return c +} + +// ---------------------------------------------------------------------------- +// compareValues +// ---------------------------------------------------------------------------- + +func TestCompareValues(t *testing.T) { + tests := []struct { + name string + a, b any + want int + wantOK bool + }{ + // nil handling + {"nil==nil", nil, nil, equal, true}, + {"nilnil", int64(1), nil, more, true}, + + // int64 vs int64 + {"i64 less", int64(1), int64(2), less, true}, + {"i64 equal", int64(3), int64(3), equal, true}, + {"i64 more", int64(5), int64(4), more, true}, + + // int64 vs int + {"i64 vs int less", int64(1), int(2), less, true}, + {"i64 vs int equal", int64(3), int(3), equal, true}, + {"i64 vs int more", int64(5), int(4), more, true}, + + // int64 vs float64 + {"i64 vs f64 less", int64(1), float64(1.5), less, true}, + {"i64 vs f64 equal", int64(2), float64(2.0), equal, true}, + {"i64 vs f64 more", int64(3), float64(2.9), more, true}, + + // float64 vs float64 + {"f64 less", float64(1.1), float64(1.2), less, true}, + {"f64 equal", float64(2.5), float64(2.5), equal, true}, + {"f64 more", float64(3.0), float64(2.0), more, true}, + + // float64 vs int + {"f64 vs int less", float64(0.5), int(1), less, true}, + {"f64 vs int equal", float64(2.0), int(2), equal, true}, + {"f64 vs int more", float64(2.1), int(2), more, true}, + + // float64 vs int64 + {"f64 vs i64 less", float64(0.5), int64(1), less, true}, + {"f64 vs i64 equal", float64(2.0), int64(2), equal, true}, + {"f64 vs i64 more", float64(3.0), int64(2), more, true}, + + // bool + {"bool falsefalse", true, false, more, true}, + {"bool equal", true, true, equal, true}, + + // string + {"str less", "apple", "banana", less, true}, + {"str equal", "foo", "foo", equal, true}, + {"str more", "zoo", "ant", more, true}, + + // type mismatch β†’ ok=false + {"mismatch int64 str", int64(1), "1", equal, false}, + {"mismatch f64 str", float64(1.0), "1.0", equal, false}, + {"mismatch bool str", true, "true", equal, false}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, ok := compareValues(tc.a, tc.b) + assert.Equal(t, tc.wantOK, ok, "ok") + if tc.wantOK { + assert.Equal(t, tc.want, got, "comparison result") + } + }) + } +} + +// ---------------------------------------------------------------------------- +// ConditionMatchesValue +// ---------------------------------------------------------------------------- + +func TestConditionMatchesValue(t *testing.T) { + tests := []struct { + name string + operator string + condVal any + spanVal any + exists bool + want bool + }{ + // Exists / NotExists + {"exists true", Exists, nil, "anything", true, true}, + {"exists false", Exists, nil, nil, false, false}, + {"not-exists true", NotExists, nil, nil, false, true}, + {"not-exists false", NotExists, nil, "x", true, false}, + + // EQ + {"eq string match", EQ, "foo", "foo", true, true}, + {"eq string no-match", EQ, "foo", "bar", true, false}, + {"eq int64 match", EQ, int64(42), int64(42), true, true}, + {"eq int64 no-match", EQ, int64(42), int64(0), true, false}, + {"eq type mismatch", EQ, "1", int64(1), true, false}, // compareValues returns ok=false β†’ no match + + // NEQ + {"neq match", NEQ, "foo", "bar", true, true}, + {"neq no-match", NEQ, "foo", "foo", true, false}, + + // GT / GTE / LT / LTE + {"gt true", GT, int64(1), int64(2), true, true}, + {"gt false eq", GT, int64(1), int64(1), true, false}, + {"gte equal", GTE, int64(1), int64(1), true, true}, + {"gte more", GTE, int64(1), int64(2), true, true}, + {"gte less", GTE, int64(2), int64(1), true, false}, + {"lt true", LT, int64(2), int64(1), true, true}, + {"lt false", LT, int64(1), int64(2), true, false}, + {"lte equal", LTE, int64(2), int64(2), true, true}, + {"lte less", LTE, int64(3), int64(2), true, true}, + {"lte more", LTE, int64(1), int64(2), true, false}, + + // field does not exist with non-NotExists operator β†’ no match + {"eq field missing", EQ, "foo", nil, false, false}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + c := &RulesBasedSamplerCondition{ + Operator: tc.operator, + Value: tc.condVal, + } + got := ConditionMatchesValue(c, tc.spanVal, tc.exists) + assert.Equal(t, tc.want, got) + }) + } +} + +// ---------------------------------------------------------------------------- +// SpanCounter.MatchesSpan +// ---------------------------------------------------------------------------- + +func TestMatchesSpan_NoConditions(t *testing.T) { + // A counter with no conditions matches every span. + counter := SpanCounter{Key: "all"} + assert.True(t, counter.MatchesSpan(spanData{"foo": "bar"}, nil)) + assert.True(t, counter.MatchesSpan(spanData{}, nil)) +} + +func TestMatchesSpan_SingleCondition(t *testing.T) { + counter := SpanCounter{ + Key: "errors", + Conditions: []*RulesBasedSamplerCondition{cond("error", EQ, true)}, + } + + assert.True(t, counter.MatchesSpan(spanData{"error": true}, nil)) + assert.False(t, counter.MatchesSpan(spanData{"error": false}, nil)) + assert.False(t, counter.MatchesSpan(spanData{}, nil)) +} + +func TestMatchesSpan_MultipleConditionsAllMustMatch(t *testing.T) { + counter := SpanCounter{ + Key: "slow-errors", + Conditions: []*RulesBasedSamplerCondition{ + cond("error", EQ, true), + cond("duration_ms", GT, int64(500)), + }, + } + + assert.True(t, counter.MatchesSpan(spanData{"error": true, "duration_ms": int64(1000)}, nil)) + assert.False(t, counter.MatchesSpan(spanData{"error": true, "duration_ms": int64(100)}, nil)) + assert.False(t, counter.MatchesSpan(spanData{"error": false, "duration_ms": int64(1000)}, nil)) + assert.False(t, counter.MatchesSpan(spanData{}, nil)) +} + +func TestMatchesSpan_RootPrefixedField(t *testing.T) { + // "root.service.name" reads from the root span data, not the span itself. + counter := SpanCounter{ + Key: "svc-db", + Conditions: []*RulesBasedSamplerCondition{cond("root.service.name", EQ, "database")}, + } + + root := spanData{"service.name": "database"} + span := spanData{"duration_ms": int64(5)} + + assert.True(t, counter.MatchesSpan(span, root)) + assert.False(t, counter.MatchesSpan(span, spanData{"service.name": "api"})) +} + +func TestMatchesSpan_RootPrefixedField_NilRoot(t *testing.T) { + // When root is nil a root-prefixed field is never found β†’ field is absent. + counter := SpanCounter{ + Key: "svc", + Conditions: []*RulesBasedSamplerCondition{cond("root.service.name", EQ, "database")}, + } + assert.False(t, counter.MatchesSpan(spanData{}, nil)) +} + +func TestMatchesSpan_MultiFieldFallback(t *testing.T) { + // When multiple fields are listed, the first one found is used. + c := &RulesBasedSamplerCondition{ + Fields: []string{"trace.trace_id", "traceId"}, + Operator: Exists, + } + if err := c.Init(); err != nil { + t.Fatal(err) + } + counter := SpanCounter{Key: "has-trace", Conditions: []*RulesBasedSamplerCondition{c}} + + assert.True(t, counter.MatchesSpan(spanData{"trace.trace_id": "abc"}, nil)) + assert.True(t, counter.MatchesSpan(spanData{"traceId": "abc"}, nil)) + assert.False(t, counter.MatchesSpan(spanData{}, nil)) +} + +func TestMatchesSpan_MultiFieldFallback_FirstWins(t *testing.T) { + // If the first field exists but evaluates to a non-match, the second field + // is not consulted β€” only the first found field is used. + c := &RulesBasedSamplerCondition{ + Fields: []string{"a", "b"}, + Operator: EQ, + Value: "yes", + } + if err := c.Init(); err != nil { + t.Fatal(err) + } + counter := SpanCounter{Key: "k", Conditions: []*RulesBasedSamplerCondition{c}} + + // "a" is found with wrong value; "b" has the right value but is not checked. + assert.False(t, counter.MatchesSpan(spanData{"a": "no", "b": "yes"}, nil)) + // Only "b" exists β†’ fallback to "b" β†’ match. + assert.True(t, counter.MatchesSpan(spanData{"b": "yes"}, nil)) +} + +func TestMatchesSpan_TypedCondition(t *testing.T) { + // When Datatype is set, Init wires up a type-coercing Matches function. + // Verify that MatchesSpan delegates to it correctly. + counter := SpanCounter{ + Key: "count-int", + Conditions: []*RulesBasedSamplerCondition{condTyped("code", EQ, 200, "int")}, + } + + // span value arrives as string "200"; the typed matcher coerces it. + assert.True(t, counter.MatchesSpan(spanData{"code": "200"}, nil)) + assert.False(t, counter.MatchesSpan(spanData{"code": "404"}, nil)) +} + +func TestMatchesSpan_ExistsAndNotExists(t *testing.T) { + exists := SpanCounter{ + Key: "has-field", + Conditions: []*RulesBasedSamplerCondition{cond("db.query", Exists, nil)}, + } + notExists := SpanCounter{ + Key: "no-field", + Conditions: []*RulesBasedSamplerCondition{cond("db.query", NotExists, nil)}, + } + + withField := spanData{"db.query": "SELECT 1"} + without := spanData{} + + assert.True(t, exists.MatchesSpan(withField, nil)) + assert.False(t, exists.MatchesSpan(without, nil)) + assert.False(t, notExists.MatchesSpan(withField, nil)) + assert.True(t, notExists.MatchesSpan(without, nil)) +} + +// ---------------------------------------------------------------------------- +// SpanCounter.MatchesScope / ShouldEmitTotalOnRoot +// ---------------------------------------------------------------------------- + +func TestMatchesScope_EmptyScopeNeverMatches(t *testing.T) { + counter := SpanCounter{Key: "k"} + assert.False(t, counter.MatchesScope(spanData{"foo": "bar"}, nil)) + assert.False(t, counter.MatchesScope(spanData{}, nil)) +} + +func TestMatchesScope_AllConditionsMustMatch(t *testing.T) { + counter := SpanCounter{ + Key: "k", + ScopeConditions: []*RulesBasedSamplerCondition{ + cond("graphql.operation.name", Exists, nil), + cond("kind", EQ, "server"), + }, + } + assert.True(t, counter.MatchesScope(spanData{"graphql.operation.name": "Q", "kind": "server"}, nil)) + assert.False(t, counter.MatchesScope(spanData{"graphql.operation.name": "Q"}, nil)) + assert.False(t, counter.MatchesScope(spanData{"kind": "server"}, nil)) +} + +func TestMatchesScope_RootPrefixSupported(t *testing.T) { + counter := SpanCounter{ + Key: "k", + ScopeConditions: []*RulesBasedSamplerCondition{ + cond("root.service.name", EQ, "api"), + }, + } + assert.True(t, counter.MatchesScope(spanData{}, spanData{"service.name": "api"})) + assert.False(t, counter.MatchesScope(spanData{}, spanData{"service.name": "worker"})) + assert.False(t, counter.MatchesScope(spanData{}, nil)) +} + +func TestShouldEmitTotalOnRoot(t *testing.T) { + // Unscoped β†’ always emit on root (today's behavior). + assert.True(t, (&SpanCounter{Key: "k"}).ShouldEmitTotalOnRoot()) + + // Scoped, no RootKey β†’ per-anchor only, no root write. + scoped := SpanCounter{ + Key: "k", + ScopeConditions: []*RulesBasedSamplerCondition{cond("anchor", Exists, nil)}, + } + assert.False(t, scoped.ShouldEmitTotalOnRoot()) + + // Scoped + RootKey set β†’ emit total on root. + scoped.RootKey = "rk" + assert.True(t, scoped.ShouldEmitTotalOnRoot()) +} + +func TestEffectiveRootKey(t *testing.T) { + // Unscoped + no RootKey β†’ Key (today's behavior). + c := SpanCounter{Key: "k"} + assert.Equal(t, "k", c.EffectiveRootKey()) + + // Unscoped + RootKey set β†’ still Key; RootKey is ignored when unscoped. + c = SpanCounter{Key: "k", RootKey: "rk"} + assert.Equal(t, "k", c.EffectiveRootKey()) + + // Scoped + no RootKey β†’ empty string; ShouldEmitTotalOnRoot is false so + // nothing is written to the root and this value isn't consulted. + c = SpanCounter{ + Key: "k", + ScopeConditions: []*RulesBasedSamplerCondition{cond("anchor", Exists, nil)}, + } + assert.Equal(t, "", c.EffectiveRootKey()) + + // Scoped + RootKey set β†’ RootKey overrides the root write. + c = SpanCounter{ + Key: "anchor_count", + RootKey: "trace_count", + ScopeConditions: []*RulesBasedSamplerCondition{cond("anchor", Exists, nil)}, + } + assert.Equal(t, "trace_count", c.EffectiveRootKey()) +} + +// ---------------------------------------------------------------------------- +// validateSpanCounterEntry (custom rules) +// ---------------------------------------------------------------------------- + +func TestValidateSpanCounterEntry_DuplicateKey(t *testing.T) { + seen := map[string]int{} + results := validateSpanCounterEntry(0, map[string]any{"Key": "k"}, seen) + assert.Empty(t, results) + results = validateSpanCounterEntry(1, map[string]any{"Key": "k"}, seen) + require.Len(t, results, 1) + assert.Equal(t, Error, results[0].Severity) + assert.Contains(t, results[0].Message, "collides") +} + +func TestValidateSpanCounterEntry_RootKeyCollidesWithKey(t *testing.T) { + seen := map[string]int{} + // Counter 0 declares Key="shared". + results := validateSpanCounterEntry(0, map[string]any{"Key": "shared"}, seen) + assert.Empty(t, results) + // Counter 1 declares RootKey="shared" β†’ collision with counter 0's Key. + results = validateSpanCounterEntry(1, map[string]any{ + "Key": "other", + "RootKey": "shared", + "ScopeConditions": []any{ + map[string]any{"Field": "x", "Operator": "exists"}, + }, + }, seen) + require.NotEmpty(t, results) + var sawErr bool + for _, r := range results { + if r.Severity == Error && strings.Contains(r.Message, "RootKey") && strings.Contains(r.Message, "collides") { + sawErr = true + } + } + assert.True(t, sawErr, "RootKey colliding with another counter's Key must error") +} + +func TestValidateSpanCounterEntry_RootKeyWithoutScopeWarns(t *testing.T) { + seen := map[string]int{} + results := validateSpanCounterEntry(0, map[string]any{ + "Key": "k", + "RootKey": "rk", + }, seen) + require.NotEmpty(t, results) + var sawWarn bool + for _, r := range results { + if r.Severity == Warning && strings.Contains(r.Message, "RootKey") && strings.Contains(r.Message, "ignored") { + sawWarn = true + } + } + assert.True(t, sawWarn, "RootKey without ScopeConditions must warn") +} + +func TestValidateSpanCounterEntry_RootKeyEqualToKeyIsNoop(t *testing.T) { + seen := map[string]int{} + results := validateSpanCounterEntry(0, map[string]any{ + "Key": "k", + "RootKey": "k", + "ScopeConditions": []any{ + map[string]any{"Field": "x", "Operator": "exists"}, + }, + }, seen) + assert.Empty(t, results, "RootKey == Key is harmless and emits no diagnostics") +} + +func TestValidateSpanCounterEntry_ReservedNamespace(t *testing.T) { + seen := map[string]int{} + results := validateSpanCounterEntry(0, map[string]any{"Key": "meta.refinery.reserved"}, seen) + require.Len(t, results, 1) + assert.Equal(t, Error, results[0].Severity) + assert.Contains(t, results[0].Message, "reserved") +} + +func TestValidateSpanCounterEntry_MetaNamespaceWarning(t *testing.T) { + seen := map[string]int{} + results := validateSpanCounterEntry(0, map[string]any{"Key": "meta.custom"}, seen) + require.Len(t, results, 1) + assert.Equal(t, Warning, results[0].Severity) + assert.Contains(t, results[0].Message, "meta.") +} + +// TestValidateRules_ScopeConditionsThroughMetadata exercises the full +// ValidateRules path with a realistic rules document that uses +// ScopeConditions. This is a regression test for a bug where the +// metadata-driven walker tried to resolve ScopeConditions against a +// non-existent "ScopeConditions" group and produced "unknown group" / +// "unknown field" errors for every condition entry. +func TestValidateRules_ScopeConditionsThroughMetadata(t *testing.T) { + m, err := LoadRulesMetadata() + require.NoError(t, err) + + rules := map[string]any{ + "RulesVersion": 2, + "Samplers": map[string]any{ + "__default__": map[string]any{ + "DeterministicSampler": map[string]any{ + "SampleRate": 1, + }, + }, + }, + "SpanCounters": []any{ + map[string]any{ + "Key": "graphql.db_call_count", + "RootKey": "trace.db_call_total", + "ScopeConditions": []any{ + map[string]any{"Field": "graphql.field", "Operator": "exists"}, + }, + "Conditions": []any{ + map[string]any{"Field": "db.system", "Operator": "=", "Value": "postgresql", "Datatype": "string"}, + }, + }, + }, + } + + results := m.ValidateRules(rules) + for _, r := range results { + assert.NotContains(t, r.Message, "unknown group ScopeConditions", + "metadata walker should not look up ScopeConditions as a group") + assert.NotContains(t, r.Message, "unknown field ScopeConditions.", + "metadata walker should not try to validate ScopeConditions.* directly") + } + + // Same shape but with a bogus operator inside ScopeConditions β€” the + // metadata-driven "choice" validation on Operator should still catch + // this even though we routed ScopeConditions entries through the + // "Conditions" group manually. + rules["SpanCounters"].([]any)[0].(map[string]any)["ScopeConditions"] = []any{ + map[string]any{"Field": "graphql.field", "Operator": "nonsense-op"}, + } + results = m.ValidateRules(rules) + var sawBadOp bool + for _, r := range results { + if r.Severity == Error && strings.Contains(r.Message, "nonsense-op") { + sawBadOp = true + } + } + assert.True(t, sawBadOp, "bogus Operator inside ScopeConditions must still fail metadata validation") +} + +func TestValidateSpanCounterEntry_HasRootSpanInScope(t *testing.T) { + seen := map[string]int{} + results := validateSpanCounterEntry(0, map[string]any{ + "Key": "k", + "ScopeConditions": []any{ + map[string]any{"Operator": HasRootSpan}, + }, + }, seen) + require.NotEmpty(t, results) + var sawErr bool + for _, r := range results { + if r.Severity == Error && strings.Contains(r.Message, HasRootSpan) { + sawErr = true + } + } + assert.True(t, sawErr, "must reject HasRootSpan in ScopeConditions") +} diff --git a/config/validate.go b/config/validate.go index 41b0c135b7..d64917308b 100644 --- a/config/validate.go +++ b/config/validate.go @@ -653,6 +653,73 @@ func (m *Metadata) ValidateRules(data map[string]any) ValidationResults { } } hasSamplers = true + case "SpanCounters": + if arr, ok := v.([]any); !ok { + results = append(results, ValidationResult{ + Message: fmt.Sprintf("SpanCounters must be an array, but %v is %T", v, v), + Severity: Error, + }) + } else { + seenKeys := make(map[string]int, len(arr)) + for i, entry := range arr { + if entryMap, ok := entry.(map[string]any); ok { + // ScopeConditions reuses the structure of Conditions but + // the metadata-driven walker would look up a group named + // "ScopeConditions" (which doesn't exist). Validate + // ScopeConditions entries directly against the + // "Conditions" group and remove the key before the + // recursive Validate call. + scopeKey := "ScopeConditions" + scope, hasScope := entryMap[scopeKey] + if hasScope { + delete(entryMap, scopeKey) + if scopeArr, ok := scope.([]any); ok { + for ci, cond := range scopeArr { + condMap, ok := cond.(map[string]any) + if !ok { + results = append(results, ValidationResult{ + Message: fmt.Sprintf("SpanCounters[%d].ScopeConditions[%d] must be an object, but %v is %T", i, ci, cond, cond), + Severity: Error, + }) + continue + } + subresults := m.Validate(map[string]any{"Conditions": condMap}) + for _, result := range subresults { + results = append(results, ValidationResult{ + Message: fmt.Sprintf("Within SpanCounters[%d].ScopeConditions[%d]: %s", i, ci, result.Message), + Severity: result.Severity, + }) + } + } + } else { + results = append(results, ValidationResult{ + Message: fmt.Sprintf("SpanCounters[%d].ScopeConditions must be an array, but %v is %T", i, scope, scope), + Severity: Error, + }) + } + } + + rulesmap := map[string]any{"SpanCounters": entryMap} + subresults := m.Validate(rulesmap) + for _, result := range subresults { + results = append(results, ValidationResult{ + Message: fmt.Sprintf("Within SpanCounters[%d]: %s", i, result.Message), + Severity: result.Severity, + }) + } + + if hasScope { + entryMap[scopeKey] = scope + } + results = append(results, validateSpanCounterEntry(i, entryMap, seenKeys)...) + } else { + results = append(results, ValidationResult{ + Message: fmt.Sprintf("SpanCounters[%d] must be an object, but %v is %T", i, entry, entry), + Severity: Error, + }) + } + } + } default: results = append(results, ValidationResult{ Message: fmt.Sprintf("unknown top-level key %s", k), @@ -697,3 +764,78 @@ func (m *Metadata) ValidateRules(data map[string]any) ValidationResults { return results } + +// validateSpanCounterEntry runs the custom-rule validations on a single +// SpanCounter entry: no-op detection (empty ScopeConditions with +// EmitTotalOnRoot=false), Key/RootKey uniqueness across all written field +// names, reserved-namespace checks on Key/RootKey, RootKey-without-scope +// warning, and rejection of the trace-level HasRootSpan operator inside +// ScopeConditions. seenKeys tracks every written field name already seen in +// this counter list and is updated in place. +func validateSpanCounterEntry(idx int, entry map[string]any, seenKeys map[string]int) ValidationResults { + var results ValidationResults + + scope, hasScope := entry["ScopeConditions"] + scopeArr, _ := scope.([]any) + scopeIsEmpty := !hasScope || len(scopeArr) == 0 + + keyStr, _ := entry["Key"].(string) + rootKeyStr, _ := entry["RootKey"].(string) + + results = append(results, validateSpanCounterFieldName(idx, "Key", keyStr, seenKeys)...) + if rootKeyStr != "" && rootKeyStr != keyStr { + results = append(results, validateSpanCounterFieldName(idx, "RootKey", rootKeyStr, seenKeys)...) + } + + if rootKeyStr != "" && scopeIsEmpty { + results = append(results, ValidationResult{ + Message: fmt.Sprintf("SpanCounters[%d]: RootKey is set but ScopeConditions is empty; RootKey is ignored β€” the root write uses Key", idx), + Severity: Warning, + }) + } + + for ci, cond := range scopeArr { + condMap, ok := cond.(map[string]any) + if !ok { + continue + } + if op, ok := condMap["Operator"].(string); ok && op == HasRootSpan { + results = append(results, ValidationResult{ + Message: fmt.Sprintf("SpanCounters[%d].ScopeConditions[%d]: operator %q is trace-level and cannot be used as a span filter", idx, ci, HasRootSpan), + Severity: Error, + }) + } + } + + return results +} + +// validateSpanCounterFieldName checks one written-field-name (Key or +// RootKey) for cross-counter uniqueness and reserved-namespace violations. +// seenKeys is updated in place. +func validateSpanCounterFieldName(idx int, fieldLabel, name string, seenKeys map[string]int) ValidationResults { + if name == "" { + return nil + } + var results ValidationResults + if prev, exists := seenKeys[name]; exists && prev != idx { + results = append(results, ValidationResult{ + Message: fmt.Sprintf("SpanCounters[%d]: %s %q collides with a Key or RootKey already declared at SpanCounters[%d]", idx, fieldLabel, name, prev), + Severity: Error, + }) + } else { + seenKeys[name] = idx + } + if strings.HasPrefix(name, "meta.refinery.") { + results = append(results, ValidationResult{ + Message: fmt.Sprintf("SpanCounters[%d]: %s %q uses the reserved meta.refinery. namespace", idx, fieldLabel, name), + Severity: Error, + }) + } else if strings.HasPrefix(name, "meta.") { + results = append(results, ValidationResult{ + Message: fmt.Sprintf("SpanCounters[%d]: %s %q starts with meta.; int fields with value 0 cannot be distinguished from missing", idx, fieldLabel, name), + Severity: Warning, + }) + } + return results +} diff --git a/config_complete.yaml b/config_complete.yaml index beb7eaf4ba..21e1a24751 100644 --- a/config_complete.yaml +++ b/config_complete.yaml @@ -2,7 +2,7 @@ ## Honeycomb Refinery Configuration ## ###################################### # -# created on 2026-02-25 at 20:49:27 UTC from ../../config.yaml using a template generated on 2026-02-25 at 20:49:24 UTC +# created on 2026-04-09 at 22:21:32 UTC from ../../config.yaml using a template generated on 2026-04-09 at 22:21:28 UTC # This file contains a configuration for the Honeycomb Refinery. It is in YAML # format, organized into named groups, each of which contains a set of @@ -166,16 +166,35 @@ AccessKeys: ## will be proxied through to the upstream API directly without modifying ## keys. ## - ## Not eligible for live reload. + ## Eligible for live reload. # ReceiveKeys: # - your-key-goes-here + ## ReceiveKeyIDs is a set of Honeycomb Ingest Key IDs that the proxy will + ## treat specially. + ## + ## When `AcceptOnlyListedKeys` is `true`, traffic using an API key whose + ## Honeycomb ingest key ID matches an entry in this list will be + ## accepted. The key ID is the `id` field returned by the Honeycomb + ## `/1/auth` endpoint; it is distinct from the full API key value. + ## This allows authorization based on key IDs rather than full key + ## values, which avoids storing secret key material in the configuration + ## file. Both `ReceiveKeys` and `ReceiveKeyIDs` may be used + ## simultaneously. + ## Note: This feature does not support legacy API keys. Only Honeycomb + ## Ingest Keys (which have a key ID) are compatible with this setting. + ## + ## Eligible for live reload. + # ReceiveKeyIDs: + # - your-key-id-goes-here + ## AcceptOnlyListedKeys is a boolean flag that causes events arriving ## with API keys not in the `ReceiveKeys` list to be rejected. ## - ## If `true`, then only traffic using the keys listed in `ReceiveKeys` is - ## accepted. Events arriving with API keys not in the `ReceiveKeys` list - ## will be rejected with an HTTP `401` error. + ## If `true`, then only traffic using the keys listed in `ReceiveKeys` or + ## whose key ID is listed in `ReceiveKeyIDs` is accepted. Events arriving + ## with API keys not in either list will be rejected with an HTTP `401` + ## error. ## If `false`, then all traffic is accepted and `ReceiveKeys` is ignored. ## This setting is applied **before** the `SendKey` and `SendKeyMode` ## settings. @@ -695,6 +714,24 @@ OTelMetrics: ## Options: none gzip # Compression: gzip + ## AdditionalAttributes adds the provided attributes as resource + ## attributes on all OpenTelemetry metrics emitted by Refinery. + ## + ## This is useful for injecting deployment-specific metadata (such as a + ## cluster ID or environment name) into metrics so they can be filtered + ## or grouped in the metrics backend. Both keys and values must be + ## strings. + ## When supplying via a environment variable, the value should be a + ## string of comma-separated key-value pairs. When supplying via the + ## command line, the value should be a key value pair. If multiple + ## key-value pairs are needed, each should be supplied via its own + ## command line flag. The key-value pairs must use ':' as the separator. + ## + ## Not eligible for live reload. + # AdditionalAttributes: + # pipeline.id: '12345' + # rollout.id: '67890' + ########################### ## OpenTelemetry Tracing ## ########################### diff --git a/go.mod b/go.mod index 6cbe637eef..eb1c581408 100644 --- a/go.mod +++ b/go.mod @@ -15,68 +15,105 @@ require ( github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/honeycombio/dynsampler-go v0.6.4 github.com/honeycombio/hpsf v0.14.0 - github.com/honeycombio/husky v0.41.0 - github.com/honeycombio/libhoney-go v1.26.0 + github.com/honeycombio/husky v0.43.1 + github.com/honeycombio/libhoney-go v1.27.1 github.com/jessevdk/go-flags v1.6.1 github.com/jonboulle/clockwork v0.5.0 github.com/json-iterator/go v1.1.12 - github.com/klauspost/compress v1.18.2 - github.com/open-telemetry/opamp-go v0.22.0 - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest v0.145.0 + github.com/klauspost/compress v1.18.6 + github.com/open-telemetry/opamp-go v0.23.0 + github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest v0.151.0 github.com/panmari/cuckoofilter v1.0.6 - github.com/pelletier/go-toml/v2 v2.2.4 + github.com/pelletier/go-toml/v2 v2.3.0 github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.23.2 github.com/rcrowley/go-metrics v0.0.0-20250401214520-65e299d6c5c9 - github.com/redis/go-redis/v9 v9.17.3 + github.com/redis/go-redis/v9 v9.19.0 github.com/sirupsen/logrus v1.9.4 github.com/sourcegraph/conc v0.3.0 github.com/stretchr/testify v1.11.1 + github.com/testcontainers/testcontainers-go/modules/redis v0.42.0 github.com/tidwall/gjson v1.18.0 - github.com/tinylib/msgp v1.6.2 - github.com/valyala/fastjson v1.6.7 + github.com/tinylib/msgp v1.6.4 + github.com/valyala/fastjson v1.6.10 github.com/vmihailenco/msgpack/v5 v5.4.1 - go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.65.0 - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 - go.opentelemetry.io/otel v1.40.0 - go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.40.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.40.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.40.0 - go.opentelemetry.io/otel/metric v1.40.0 - go.opentelemetry.io/otel/sdk v1.40.0 - go.opentelemetry.io/otel/sdk/metric v1.40.0 - go.opentelemetry.io/otel/trace v1.40.0 - go.opentelemetry.io/proto/otlp v1.9.0 + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.68.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 + go.opentelemetry.io/otel v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 + go.opentelemetry.io/otel/metric v1.43.0 + go.opentelemetry.io/otel/sdk v1.43.0 + go.opentelemetry.io/otel/sdk/metric v1.43.0 + go.opentelemetry.io/otel/trace v1.43.0 + go.opentelemetry.io/proto/otlp v1.10.0 golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b - google.golang.org/grpc v1.78.0 + google.golang.org/grpc v1.80.0 google.golang.org/protobuf v1.36.11 gopkg.in/yaml.v3 v3.0.1 ) require ( + dario.cat/mergo v1.0.2 // indirect + github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect + github.com/Microsoft/go-winio v0.6.2 // indirect github.com/cenkalti/backoff/v5 v5.0.3 // indirect + github.com/containerd/errdefs v1.0.0 // indirect + github.com/containerd/errdefs/pkg v0.3.0 // indirect + github.com/containerd/log v0.1.0 // indirect + github.com/containerd/platforms v0.2.1 // indirect + github.com/cpuguy83/dockercfg v0.3.2 // indirect + github.com/distribution/reference v0.6.0 // indirect + github.com/docker/go-connections v0.6.0 // indirect + github.com/docker/go-units v0.5.0 // indirect + github.com/ebitengine/purego v0.10.0 // indirect + github.com/go-ole/go-ole v1.2.6 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/google/go-licenses/v2 v2.0.1 // indirect github.com/google/licenseclassifier/v2 v2.0.0 // indirect github.com/gorilla/websocket v1.5.3 // indirect - github.com/hashicorp/go-version v1.8.0 // indirect + github.com/hashicorp/go-version v1.9.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect + github.com/magiconair/properties v1.8.10 // indirect + github.com/mdelapenya/tlscert v0.2.0 // indirect github.com/michel-laterman/proxy-connect-dialer-go v0.1.0 // indirect - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.145.0 // indirect + github.com/moby/docker-image-spec v1.3.1 // indirect + github.com/moby/go-archive v0.2.0 // indirect + github.com/moby/moby/api v1.54.1 // indirect + github.com/moby/moby/client v0.4.0 // indirect + github.com/moby/patternmatcher v0.6.1 // indirect + github.com/moby/sys/sequential v0.6.0 // indirect + github.com/moby/sys/user v0.4.0 // indirect + github.com/moby/sys/userns v0.1.0 // indirect + github.com/moby/term v0.5.2 // indirect + github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.151.0 // indirect github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling v0.142.0 // indirect + github.com/opencontainers/go-digest v1.0.0 // indirect + github.com/opencontainers/image-spec v1.1.1 // indirect github.com/otiai10/copy v1.10.0 // indirect github.com/philhofer/fwd v1.2.0 // indirect + github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect github.com/sergi/go-diff v1.2.0 // indirect + github.com/shirou/gopsutil/v4 v4.26.3 // indirect github.com/spf13/cobra v1.7.0 // indirect github.com/spf13/pflag v1.0.5 // indirect - github.com/stretchr/objx v0.5.2 // indirect + github.com/stretchr/objx v0.5.3 // indirect + github.com/testcontainers/testcontainers-go v0.42.0 // indirect + github.com/tklauser/go-sysconf v0.3.16 // indirect + github.com/tklauser/numcpus v0.11.0 // indirect + github.com/yusufpapurcu/wmi v1.2.4 // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect - go.opentelemetry.io/collector/featuregate v1.51.0 // indirect + go.opentelemetry.io/collector/featuregate v1.57.0 // indirect + go.opentelemetry.io/collector/pdata/xpdata v0.151.0 // indirect go.opentelemetry.io/proto/otlp/collector/profiles/v1development v0.2.0 // indirect go.opentelemetry.io/proto/otlp/profiles/v1development v0.2.0 // indirect + go.uber.org/atomic v1.11.0 // indirect go.yaml.in/yaml/v2 v2.4.2 // indirect - golang.org/x/sync v0.19.0 // indirect + golang.org/x/crypto v0.49.0 // indirect + golang.org/x/sync v0.20.0 // indirect gopkg.in/alexcesaro/statsd.v2 v2.0.0 // indirect k8s.io/klog/v2 v2.90.1 // indirect ) @@ -86,7 +123,6 @@ require ( github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33 // indirect - github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a // indirect github.com/facebookgo/limitgroup v0.0.0-20150612190941-6abd8d71ec01 // indirect github.com/facebookgo/muster v0.0.0-20150708232844-fd3d7953fd52 // indirect @@ -95,7 +131,7 @@ require ( github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/google/uuid v1.6.0 - github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect @@ -107,17 +143,15 @@ require ( github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.1 // indirect github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect - go.opentelemetry.io/collector/pdata v1.51.0 + go.opentelemetry.io/collector/pdata v1.57.0 go.uber.org/multierr v1.11.0 // indirect - golang.org/x/mod v0.31.0 - golang.org/x/net v0.49.0 // indirect - golang.org/x/sys v0.40.0 // indirect - golang.org/x/text v0.33.0 // indirect - golang.org/x/tools v0.40.0 - google.golang.org/genproto/googleapis/api v0.0.0-20260128011058-8636f8732409 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 // indirect + golang.org/x/mod v0.35.0 + golang.org/x/net v0.52.0 // indirect + golang.org/x/sys v0.42.0 // indirect + golang.org/x/text v0.35.0 // indirect + golang.org/x/tools v0.43.0 + google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260406210006-6f92a3bedf2d // indirect ) tool github.com/google/go-licenses/v2 - -replace go.opentelemetry.io/proto/otlp => github.com/honeycombio/opentelemetry-proto-go/otlp v1.9.0-compat diff --git a/go.sum b/go.sum index 3eb28ed3bf..ebf3dece30 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,15 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +dario.cat/mergo v1.0.2 h1:85+piFYR1tMbRrLcDwR18y4UKJ3aH1Tbzi24VRW1TK8= +dario.cat/mergo v1.0.2/go.mod h1:E/hbnu0NxMFBjpMIE34DRGLWqDy0g5FuKDhCb31ngxA= +github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 h1:He8afgbRMd7mFxO99hRNu+6tazq8nFF9lIwo9JFroBk= +github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= +github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg= +github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/DataDog/zstd v1.5.7 h1:ybO8RBeh29qrxIhCA9E8gKY6xfONU9T6G6aP9DTKfLE= github.com/DataDog/zstd v1.5.7/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= +github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= +github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM= github.com/agnivade/levenshtein v1.2.1/go.mod h1:QVVI16kDrtSuwcpd0p1+xMC6Z/VfhtCyDIjcwga4/DU= github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q= @@ -21,7 +29,19 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI= +github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= +github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE= +github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk= +github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= +github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= +github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A= +github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw= +github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA= +github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s= +github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE= github.com/creasty/defaults v1.8.0 h1:z27FJxCAa0JKt3utc0sCImAEb+spPucmKoOdLHvHYKk= github.com/creasty/defaults v1.8.0/go.mod h1:iGzKe6pbEHnpMPtfDXZEr0NVxWnPTjb1bbDy08fPzYM= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -30,12 +50,18 @@ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw= github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33 h1:ucRHb6/lvW/+mTEIGbvhcYU3S8+uSNkuMjx/qZFfhtM= github.com/dgryski/go-metro v0.0.0-20250106013310-edb8663e5e33/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw= -github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= -github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= github.com/dgryski/go-wyhash v0.0.0-20191203203029-c4841ae36371 h1:bz5ApY1kzFBvw3yckuyRBCtqGvprWrKswYK468nm+Gs= github.com/dgryski/go-wyhash v0.0.0-20191203203029-c4841ae36371/go.mod h1:/ENMIO1SQeJ5YQeUWWpbX8f+bS8INHrrhFjXgEqi4LA= github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 h1:SG7nF6SRlWhcT7cNTs5R6Hk4V2lcmLz2NsG2VnInyNo= github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA= +github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= +github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= +github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94= +github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE= +github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= +github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= +github.com/ebitengine/purego v0.10.0 h1:QIw4xfpWT6GWTzaW5XEKy3HXoqrJGx1ijYHzTF0/ISU= +github.com/ebitengine/purego v0.10.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -66,6 +92,8 @@ github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= +github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= @@ -91,6 +119,7 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= @@ -112,22 +141,20 @@ github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aN github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grafana/pyroscope-go/godeltaprof v0.1.9 h1:c1Us8i6eSmkW+Ez05d3co8kasnuOY813tbMN8i/a3Og= github.com/grafana/pyroscope-go/godeltaprof v0.1.9/go.mod h1:2+l7K7twW49Ct4wFluZD3tZ6e0SjanjcUUBPVD/UuGU= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7 h1:X+2YciYSxvMQK0UZ7sg45ZVabVZBeBuvMkmuI2V3Fak= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7/go.mod h1:lW34nIZuQ8UDPdkon5fmfp2l3+ZkQ2me/+oecHYLOII= -github.com/hashicorp/go-version v1.8.0 h1:KAkNb1HAiZd1ukkxDFGmokVZe1Xy9HG6NUp+bPle2i4= -github.com/hashicorp/go-version v1.8.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c= +github.com/hashicorp/go-version v1.9.0 h1:CeOIz6k+LoN3qX9Z0tyQrPtiB1DFYRPfCIBtaXPSCnA= +github.com/hashicorp/go-version v1.9.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/honeycombio/dynsampler-go v0.6.4 h1:EM3FXN2Lfmso41MRMmSvRynMrz+AHiRffaWHPf4ZHDs= github.com/honeycombio/dynsampler-go v0.6.4/go.mod h1:M5YYNOfxRrBlEWDatTlHMYo5F7GjwVnptx5z+uXIVMo= github.com/honeycombio/hpsf v0.14.0 h1:LeQbDuT+aVmiJnWp9Kqb9Qqz5OZcjDk85RMzzwKtCKI= github.com/honeycombio/hpsf v0.14.0/go.mod h1:VyPjyn1GViOiCrpBbPZCkEJnuDuSTUpU8LV5CWVTQm4= -github.com/honeycombio/husky v0.41.0 h1:6iuC3FJpU2xZUveLFGAWvDP/Xp9Vnt1vMgwu2UCeQfA= -github.com/honeycombio/husky v0.41.0/go.mod h1:kgwFQfPCC82f5BxuBb8BAuuC1Q7e5NK7EVsjcjztuXo= -github.com/honeycombio/libhoney-go v1.26.0 h1:fdwS7c/5h6ifJqQZ178nm4UEZha04GTbwJMZ7xkShhk= -github.com/honeycombio/libhoney-go v1.26.0/go.mod h1:cR+t7pq9heP00+1/+TNWCrAfjSA74xKWI8YGOANlzYY= -github.com/honeycombio/opentelemetry-proto-go/otlp v1.9.0-compat h1:g6pUF6IZVLG93vZbUefK0qF20CGx0zf0q3n3Fw4gv1s= -github.com/honeycombio/opentelemetry-proto-go/otlp v1.9.0-compat/go.mod h1:ZyEcAltAA7tCBVo5o+5klmG2l+43E1fjpxGxvOIskic= +github.com/honeycombio/husky v0.43.1 h1:HRaSO59KujOsYNQO1Qkn8YFboizheTJcKlBvVhClDe8= +github.com/honeycombio/husky v0.43.1/go.mod h1:lQ1VzGZxeYPCr4zxmak1lVe29HJFqJ6bQXWCl0ZqlNg= +github.com/honeycombio/libhoney-go v1.27.1 h1:79FR19fVpaeDMqTDfpXtMxd90vzsxhZnIOSysMrUSQQ= +github.com/honeycombio/libhoney-go v1.27.1/go.mod h1:qLZO8Q3ep/hISEoVC7m8N9ZOvn2eqaGdoJg9XXXasqM= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jessevdk/go-flags v1.6.1 h1:Cvu5U8UGrLay1rZfv/zP7iLpSHGUZ/Ou68T0iX1bBK4= @@ -136,8 +163,10 @@ github.com/jonboulle/clockwork v0.5.0 h1:Hyh9A8u51kptdkR+cqRpT1EebBwTn1oK9YfGYbd github.com/jonboulle/clockwork v0.5.0/go.mod h1:3mZlmanh0g2NDKO5TWZVJAfofYk64M7XN3SzBPjZF60= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= -github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk= -github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= +github.com/klauspost/compress v1.18.6 h1:2jupLlAwFm95+YDR+NwD2MEfFO9d4z4Prjl1XXDjuao= +github.com/klauspost/compress v1.18.6/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ= +github.com/klauspost/cpuid/v2 v2.2.10 h1:tBs3QSyvjDyFTq3uoc/9xFpCuOsJQFNPiAhYdw2skhE= +github.com/klauspost/cpuid/v2 v2.2.10/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -147,8 +176,32 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= +github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE= +github.com/magiconair/properties v1.8.10/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= +github.com/mdelapenya/tlscert v0.2.0 h1:7H81W6Z/4weDvZBNOfQte5GpIMo0lGYEeWbkGp5LJHI= +github.com/mdelapenya/tlscert v0.2.0/go.mod h1:O4njj3ELLnJjGdkN7M/vIVCpZ+Cf0L6muqOG4tLSl8o= github.com/michel-laterman/proxy-connect-dialer-go v0.1.0 h1:Q8asukpmyrEheocd+R+6YEI4jcm62sHHalgTMG+LoLw= github.com/michel-laterman/proxy-connect-dialer-go v0.1.0/go.mod h1:HTlVkRAqzTRPYbWxgAiwMT9HRZMOqP3Mx7+toa3yJjc= +github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= +github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= +github.com/moby/go-archive v0.2.0 h1:zg5QDUM2mi0JIM9fdQZWC7U8+2ZfixfTYoHL7rWUcP8= +github.com/moby/go-archive v0.2.0/go.mod h1:mNeivT14o8xU+5q1YnNrkQVpK+dnNe/K6fHqnTg4qPU= +github.com/moby/moby/api v1.54.1 h1:TqVzuJkOLsgLDDwNLmYqACUuTehOHRGKiPhvH8V3Nn4= +github.com/moby/moby/api v1.54.1/go.mod h1:+RQ6wluLwtYaTd1WnPLykIDPekkuyD/ROWQClE83pzs= +github.com/moby/moby/client v0.4.0 h1:S+2XegzHQrrvTCvF6s5HFzcrywWQmuVnhOXe2kiWjIw= +github.com/moby/moby/client v0.4.0/go.mod h1:QWPbvWchQbxBNdaLSpoKpCdf5E+WxFAgNHogCWDoa7g= +github.com/moby/patternmatcher v0.6.1 h1:qlhtafmr6kgMIJjKJMDmMWq7WLkKIo23hsrpR3x084U= +github.com/moby/patternmatcher v0.6.1/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc= +github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU= +github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko= +github.com/moby/sys/user v0.4.0 h1:jhcMKit7SA80hivmFJcbB1vqmw//wU61Zdui2eQXuMs= +github.com/moby/sys/user v0.4.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs= +github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g= +github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28= +github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ= +github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -157,30 +210,36 @@ github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFd github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/open-telemetry/opamp-go v0.22.0 h1:7UnsQgFFS7ffM09JQk+9aGVBAAlsLfcooZ9xvSYwxWM= -github.com/open-telemetry/opamp-go v0.22.0/go.mod h1:339N71soCPrhHywbAcKUZJDODod581ZOxCpTkrl3zYQ= -github.com/open-telemetry/opentelemetry-collector-contrib/pkg/golden v0.145.0 h1:lbxy2bYh3v0YIyqd/JVttEwYlC7yU5o3JU2N/m5Qnq8= -github.com/open-telemetry/opentelemetry-collector-contrib/pkg/golden v0.145.0/go.mod h1:kGlLjX8CJSE+9SfLARgaXTFBuAvNadjLvPsHO7fcVeE= -github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest v0.145.0 h1:0ithmsGyVtjzODmAPp9pkxA4IlnYpyeXmDWrryTkHNo= -github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest v0.145.0/go.mod h1:r+K/aCWpUCDDM5Gisznf9ZQjpZcyFr84CuATA9486JQ= -github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.145.0 h1:sB4yuYx45zig1ceQ+kmrEYy0xMZ+mGagwYIFtJkkU1w= -github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.145.0/go.mod h1:uLhceuH7ZtiVxk+B0MHI0vhJG2Y4aOzT/hrV6c5KjVU= +github.com/open-telemetry/opamp-go v0.23.0 h1:k7h7w/muprut9/DAhUC4anX4v7hIdgO02gIsSjV4uq0= +github.com/open-telemetry/opamp-go v0.23.0/go.mod h1:DIIVdkLefdqPW5L+4I2twmAicVrTB0Bp5XJAfedZzAM= +github.com/open-telemetry/opentelemetry-collector-contrib/pkg/golden v0.151.0 h1:M+d61Wo6zhJoAWKDVUJeeZa46hepah1s+zKgfPlD0ng= +github.com/open-telemetry/opentelemetry-collector-contrib/pkg/golden v0.151.0/go.mod h1:UjELBH4CzaY+y3fHR4RpenHJ3277jBYxTC4xEa5Sxfk= +github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest v0.151.0 h1:JbnrAMGHqSW+jvJRL9RS7JGMrWpXqGPXdkAk6JoMHV4= +github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest v0.151.0/go.mod h1:xoSnCUue2dtnuMyJd/1xz7JaQ2G7eweNxM0Laj1uuVc= +github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.151.0 h1:c8+upXGwDxokINkuChSD7INYHlpcCAyQs2aXpx4rzSs= +github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.151.0/go.mod h1:Ln3K9yJgPAwEUXqCoR8htVs6bk3cyj6zIPOyM/LhiPo= github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling v0.142.0 h1:lFowWhr/qx5Gm2X8H0BbG87xZh/e+4S0PQw8HQO5D4Y= github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling v0.142.0/go.mod h1:JybcaNLHHzJQh690eSp+KDbLrxB1+AhKNLlibqrogt4= +github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= +github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= +github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= +github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= github.com/otiai10/copy v1.10.0 h1:znyI7l134wNg/wDktoVQPxPkgvhDfGCYUasey+h0rDQ= github.com/otiai10/copy v1.10.0/go.mod h1:rSaLseMUsZFFbsFGc7wCJnnkTAvdc5L6VWxPE4308Ww= github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks= github.com/otiai10/mint v1.5.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM= github.com/panmari/cuckoofilter v1.0.6 h1:WKb1aSj16h22x0CKVtTCaRkJiCnVGPLEMGbNY8xwXf8= github.com/panmari/cuckoofilter v1.0.6/go.mod h1:bKADbQPGbN6TxUvo/IbMEIUbKuASnpsOvrLTgpSX0aU= -github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4= -github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= +github.com/pelletier/go-toml/v2 v2.3.0 h1:k59bC/lIZREW0/iVaQR8nDHxVq8OVlIzYCOJf421CaM= +github.com/pelletier/go-toml/v2 v2.3.0/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM= github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU= +github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -194,14 +253,16 @@ github.com/rcrowley/go-metrics v0.0.0-20250401214520-65e299d6c5c9 h1:bsUq1dX0N8A github.com/rcrowley/go-metrics v0.0.0-20250401214520-65e299d6c5c9/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/rdleal/go-priorityq v0.0.0-20240324224830-28716009213d h1:OuC714/HtVeMJo6Y1mRkeuDmu3t+F0cgh6qPDGqLmqI= github.com/rdleal/go-priorityq v0.0.0-20240324224830-28716009213d/go.mod h1:X4AAZOixX/7z5rgQkIkMa72A0++MLRke9nipxYUg+8E= -github.com/redis/go-redis/v9 v9.17.3 h1:fN29NdNrE17KttK5Ndf20buqfDZwGNgoUr9qjl1DQx4= -github.com/redis/go-redis/v9 v9.17.3/go.mod h1:u410H11HMLoB+TP67dz8rL9s6QW2j76l0//kSOd3370= +github.com/redis/go-redis/v9 v9.19.0 h1:XPVaaPSnG6RhYf7p+rmSa9zZfeVAnWsH5h3lxthOm/k= +github.com/redis/go-redis/v9 v9.19.0/go.mod h1:v/M13XI1PVCDcm01VtPFOADfZtHf8YW3baQf57KlIkA= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ= github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= +github.com/shirou/gopsutil/v4 v4.26.3 h1:2ESdQt90yU3oXF/CdOlRCJxrP+Am1aBYubTMTfxJ1qc= +github.com/shirou/gopsutil/v4 v4.26.3/go.mod h1:LZ6ewCSkBqUpvSOf+LsTGnRinC6iaNUNMGBtDkJBaLQ= github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w= github.com/sirupsen/logrus v1.9.4/go.mod h1:ftWc9WdOfJ0a92nsE2jF5u5ZwH8Bv2zdeOC42RjbV2g= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= @@ -213,8 +274,8 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= -github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/objx v0.5.3 h1:jmXUvGomnU1o3W/V5h2VEradbpJDwGrzugQQvL0POH4= +github.com/stretchr/objx v0.5.3/go.mod h1:rDQraq+vQZU7Fde9LOZLr8Tax6zZvy4kuNKF+QYS+U0= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= @@ -222,6 +283,10 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/testcontainers/testcontainers-go v0.42.0 h1:He3IhTzTZOygSXLJPMX7n44XtK+qhjat1nI9cneBbUY= +github.com/testcontainers/testcontainers-go v0.42.0/go.mod h1:vZjdY1YmUA1qEForxOIOazfsrdyORJAbhi0bp8plN30= +github.com/testcontainers/testcontainers-go/modules/redis v0.42.0 h1:id/6LH8ZeDrtAUVSuNvZUAJ1kVpb82y1pr9yweAWsRg= +github.com/testcontainers/testcontainers-go/modules/redis v0.42.0/go.mod h1:uF0jI8FITagQpBNOgweGBmPf6rP4K0SeL1XFPbsZSSY= github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= @@ -229,56 +294,70 @@ github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JT github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= -github.com/tinylib/msgp v1.6.2 h1:D40LN895O9HJpN8n5Ksqk+abl7zw6RtizDwgRCE7hXk= -github.com/tinylib/msgp v1.6.2/go.mod h1:RSp0LW9oSxFut3KzESt5Voq4GVWyS+PSulT77roAqEA= -github.com/valyala/fastjson v1.6.7 h1:ZE4tRy0CIkh+qDc5McjatheGX2czdn8slQjomexVpBM= -github.com/valyala/fastjson v1.6.7/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY= +github.com/tinylib/msgp v1.6.4 h1:mOwYbyYDLPj35mkA2BjjYejgJk9BuHxDdvRnb6v2ZcQ= +github.com/tinylib/msgp v1.6.4/go.mod h1:RSp0LW9oSxFut3KzESt5Voq4GVWyS+PSulT77roAqEA= +github.com/tklauser/go-sysconf v0.3.16 h1:frioLaCQSsF5Cy1jgRBrzr6t502KIIwQ0MArYICU0nA= +github.com/tklauser/go-sysconf v0.3.16/go.mod h1:/qNL9xxDhc7tx3HSRsLWNnuzbVfh3e7gh/BmM179nYI= +github.com/tklauser/numcpus v0.11.0 h1:nSTwhKH5e1dMNsCdVBukSZrURJRoHbSEQjdEbY+9RXw= +github.com/tklauser/numcpus v0.11.0/go.mod h1:z+LwcLq54uWZTX0u/bGobaV34u6V7KNlTZejzM6/3MQ= +github.com/valyala/fastjson v1.6.10 h1:/yjJg8jaVQdYR3arGxPE2X5z89xrlhS0eGXdv+ADTh4= +github.com/valyala/fastjson v1.6.10/go.mod h1:e6FubmQouUNP73jtMLmcbxS6ydWIpOfhz34TSfO3JaE= github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IUPn0Bjt8= github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok= github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= +github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= +github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= +github.com/zeebo/xxh3 v1.1.0 h1:s7DLGDK45Dyfg7++yxI0khrfwq9661w9EN78eP/UZVs= +github.com/zeebo/xxh3 v1.1.0/go.mod h1:IisAie1LELR4xhVinxWS5+zf1lA4p0MW4T+w+W07F5s= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= -go.opentelemetry.io/collector/featuregate v1.51.0 h1:dxJuv/3T84dhNKp7fz5+8srHz1dhquGzDpLW4OZTFBw= -go.opentelemetry.io/collector/featuregate v1.51.0/go.mod h1:/1bclXgP91pISaEeNulRxzzmzMTm4I5Xih2SnI4HRSo= -go.opentelemetry.io/collector/internal/testutil v0.145.0 h1:H/KL0GH3kGqSMKxZvnQ0B0CulfO9xdTg4DZf28uV7fY= -go.opentelemetry.io/collector/internal/testutil v0.145.0/go.mod h1:YAD9EAkwh/l5asZNbEBEUCqEjoL1OKMjAMoPjPqH76c= -go.opentelemetry.io/collector/pdata v1.51.0 h1:DnDhSEuDXNdzGRB7f6oOfXpbDApwBX3tY+3K69oUrDA= -go.opentelemetry.io/collector/pdata v1.51.0/go.mod h1:GoX1bjKDR++mgFKdT7Hynv9+mdgQ1DDXbjs7/Ww209Q= -go.opentelemetry.io/collector/pdata/pprofile v0.145.0 h1:ASMKpoqokf8HhzjoeMKZf0K6UXLhufVwNXH0sSuUn5w= -go.opentelemetry.io/collector/pdata/pprofile v0.145.0/go.mod h1:a60GC7wQPhLAixWzKbbP51QLwwc+J0Cmp4SurOlhGUk= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.65.0 h1:XmiuHzgJt067+a6kwyAzkhXooYVv3/TOw9cM2VfJgUM= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.65.0/go.mod h1:KDgtbWKTQs4bM+VPUr6WlL9m/WXcmkCcBlIzqxPGzmI= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 h1:7iP2uCb7sGddAr30RRS6xjKy7AZ2JtTOPA3oolgVSw8= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0/go.mod h1:c7hN3ddxs/z6q9xwvfLPk+UHlWRQyaeR1LdgfL/66l0= -go.opentelemetry.io/otel v1.40.0 h1:oA5YeOcpRTXq6NN7frwmwFR0Cn3RhTVZvXsP4duvCms= -go.opentelemetry.io/otel v1.40.0/go.mod h1:IMb+uXZUKkMXdPddhwAHm6UfOwJyh4ct1ybIlV14J0g= -go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.40.0 h1:9y5sHvAxWzft1WQ4BwqcvA+IFVUJ1Ya75mSAUnFEVwE= -go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.40.0/go.mod h1:eQqT90eR3X5Dbs1g9YSM30RavwLF725Ris5/XSXWvqE= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.40.0 h1:QKdN8ly8zEMrByybbQgv8cWBcdAarwmIPZ6FThrWXJs= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.40.0/go.mod h1:bTdK1nhqF76qiPoCCdyFIV+N/sRHYXYCTQc+3VCi3MI= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.40.0 h1:wVZXIWjQSeSmMoxF74LzAnpVQOAFDo3pPji9Y4SOFKc= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.40.0/go.mod h1:khvBS2IggMFNwZK/6lEeHg/W57h/IX6J4URh57fuI40= -go.opentelemetry.io/otel/metric v1.40.0 h1:rcZe317KPftE2rstWIBitCdVp89A2HqjkxR3c11+p9g= -go.opentelemetry.io/otel/metric v1.40.0/go.mod h1:ib/crwQH7N3r5kfiBZQbwrTge743UDc7DTFVZrrXnqc= -go.opentelemetry.io/otel/sdk v1.40.0 h1:KHW/jUzgo6wsPh9At46+h4upjtccTmuZCFAc9OJ71f8= -go.opentelemetry.io/otel/sdk v1.40.0/go.mod h1:Ph7EFdYvxq72Y8Li9q8KebuYUr2KoeyHx0DRMKrYBUE= -go.opentelemetry.io/otel/sdk/metric v1.40.0 h1:mtmdVqgQkeRxHgRv4qhyJduP3fYJRMX4AtAlbuWdCYw= -go.opentelemetry.io/otel/sdk/metric v1.40.0/go.mod h1:4Z2bGMf0KSK3uRjlczMOeMhKU2rhUqdWNoKcYrtcBPg= -go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZYblVjw= -go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA= +go.opentelemetry.io/collector/featuregate v1.57.0 h1:KPDSUKYn6MHwgyGRSGPPcW/G96HH93pxuvvPwM+R8nY= +go.opentelemetry.io/collector/featuregate v1.57.0/go.mod h1:4ga1QBMPEejXXmpyJS8lmaRpknJ3Lb9Bvk6e420bUFU= +go.opentelemetry.io/collector/internal/testutil v0.151.0 h1:CFjDItLuqzblItOsnK6IPSdrsOaZCaDjYpB8qWG+XHI= +go.opentelemetry.io/collector/internal/testutil v0.151.0/go.mod h1:Jkjs6rkqs973LqgZ0Fe3zrokQRKULYXPIf4HuqStiEE= +go.opentelemetry.io/collector/pdata v1.57.0 h1:oDWBMjEIqyJO3GJEB+iwqxj47rxDK19OKzwaFEaE4sg= +go.opentelemetry.io/collector/pdata v1.57.0/go.mod h1:wZojinP6mNhLXudH8QXx/bjWzOsKMxi/FXwnk+12G/w= +go.opentelemetry.io/collector/pdata/pprofile v0.151.0 h1:hsU0+DpkvhJh3xL1Y8CX2vAPdLMoJLiw+C+rAMsaxZc= +go.opentelemetry.io/collector/pdata/pprofile v0.151.0/go.mod h1:5zfGTQqRuaKyh2SRaZi4SV4nSD8TzY1kYoOjniOD3uk= +go.opentelemetry.io/collector/pdata/xpdata v0.151.0 h1:trsLPS6jCkwVwJyKxbPqQerAiMpKkQrQLEGIEcyC6yM= +go.opentelemetry.io/collector/pdata/xpdata v0.151.0/go.mod h1:0vID3D52DGVoypLa8S7izv41ElTBEgtAbc0HmB4KF60= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.68.0 h1:0Qx7VGBacMm9ZENQ7TnNObTYI4ShC+lHI16seduaxZo= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.68.0/go.mod h1:Sje3i3MjSPKTSPvVWCaL8ugBzJwik3u4smCjUeuupqg= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 h1:CqXxU8VOmDefoh0+ztfGaymYbhdB/tT3zs79QaZTNGY= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0/go.mod h1:BuhAPThV8PBHBvg8ZzZ/Ok3idOdhWIodywz2xEcRbJo= +go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= +go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.43.0 h1:w1K+pCJoPpQifuVpsKamUdn9U0zM3xUziVOqsGksUrY= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.43.0/go.mod h1:HBy4BjzgVE8139ieRI75oXm3EcDN+6GhD88JT1Kjvxg= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 h1:88Y4s2C8oTui1LGM6bTWkw0ICGcOLCAI5l6zsD1j20k= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0/go.mod h1:Vl1/iaggsuRlrHf/hfPJPvVag77kKyvrLeD10kpMl+A= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 h1:3iZJKlCZufyRzPzlQhUIWVmfltrXuGyfjREgGP3UUjc= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0/go.mod h1:/G+nUPfhq2e+qiXMGxMwumDrP5jtzU+mWN7/sjT2rak= +go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM= +go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY= +go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg= +go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg= +go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw= +go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A= +go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A= +go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0= +go.opentelemetry.io/proto/otlp v1.10.0 h1:IQRWgT5srOCYfiWnpqUYz9CVmbO8bFmKcwYxpuCSL2g= +go.opentelemetry.io/proto/otlp v1.10.0/go.mod h1:/CV4QoCR/S9yaPj8utp3lvQPoqMtxXdzn7ozvvozVqk= go.opentelemetry.io/proto/otlp/collector/profiles/v1development v0.2.0 h1:40vBjolEOioNBl8zPj1wxqlA7kJ82RxR4HnUv7W8zRI= go.opentelemetry.io/proto/otlp/collector/profiles/v1development v0.2.0/go.mod h1:4wAsc1dEVb4D1ZykBNC9AriTU9uLYtmziLrB+7G4lb4= go.opentelemetry.io/proto/otlp/profiles/v1development v0.2.0 h1:yXinc284C6bmzA1r9jk7MxAhrBIIOH3qwmqwBmylZrA= go.opentelemetry.io/proto/otlp/profiles/v1development v0.2.0/go.mod h1:ygxocDWPB6Y6bySAjxmHyTebjAJ8jcEUAZc03gu1pxk= -go.opentelemetry.io/proto/slim/otlp v1.9.0 h1:fPVMv8tP3TrsqlkH1HWYUpbCY9cAIemx184VGkS6vlE= -go.opentelemetry.io/proto/slim/otlp v1.9.0/go.mod h1:xXdeJJ90Gqyll+orzUkY4bOd2HECo5JofeoLpymVqdI= -go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.2.0 h1:o13nadWDNkH/quoDomDUClnQBpdQQ2Qqv0lQBjIXjE8= -go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.2.0/go.mod h1:Gyb6Xe7FTi/6xBHwMmngGoHqL0w29Y4eW8TGFzpefGA= -go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.2.0 h1:EiUYvtwu6PMrMHVjcPfnsG3v+ajPkbUeH+IL93+QYyk= -go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.2.0/go.mod h1:mUUHKFiN2SST3AhJ8XhJxEoeVW12oqfXog0Bo8W3Ec4= +go.opentelemetry.io/proto/slim/otlp v1.10.0 h1:iR97Vs/ZDR+y9TfuP9b1XBtdPWeC+OMslIBmhcLU7jM= +go.opentelemetry.io/proto/slim/otlp v1.10.0/go.mod h1:lV9250stpjYLPNA5viFabIgP2QlUGRT1GdTgAf8SIUk= +go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.3.0 h1:RUF5rO0hAlgiJt1fzQVzcVs3vZVNHIcMLgOgG4rWNcQ= +go.opentelemetry.io/proto/slim/otlp/collector/profiles/v1development v0.3.0/go.mod h1:I89cynRj8y+383o7tEQVg2SVA6SRgDVIouWPUVXjx0U= +go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.3.0 h1:CQvJSldHRUN6Z8jsUeYv8J0lXRvygALXIzsmAeCcZE0= +go.opentelemetry.io/proto/slim/otlp/profiles/v1development v0.3.0/go.mod h1:xSQ+mEfJe/GjK1LXEyVOoSI1N9JV9ZI923X5kup43W4= +go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= +go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= @@ -287,64 +366,71 @@ go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= +golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b h1:QoALfVG9rhQ/M7vYDScfPdWjGL9dlsVVM5VGh7aKoAA= golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b/go.mod h1:U6Lno4MTRCDY+Ba7aCcauB9T60gsv5s4ralQzP72ZoQ= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI= -golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg= +golang.org/x/mod v0.35.0 h1:Ww1D637e6Pg+Zb2KrWfHQUnH2dQRLBQyAtpr/haaJeM= +golang.org/x/mod v0.35.0/go.mod h1:+GwiRhIInF8wPm+4AoT6L0FA1QWAad3OMdTRx4tFYlU= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= -golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= +golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= +golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= -golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= -golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= +golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU= +golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= -golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= +golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= +golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA= -golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc= +golang.org/x/tools v0.43.0 h1:12BdW9CeB3Z+J/I/wj34VMl8X+fEXBxVR90JeMX5E7s= +golang.org/x/tools v0.43.0/go.mod h1:uHkMso649BX2cZK6+RpuIPXS3ho2hZo4FVwfoy1vIk0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= -gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= +gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= +gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto/googleapis/api v0.0.0-20260128011058-8636f8732409 h1:merA0rdPeUV3YIIfHHcH4qBkiQAc1nfCKSI7lB4cV2M= -google.golang.org/genproto/googleapis/api v0.0.0-20260128011058-8636f8732409/go.mod h1:fl8J1IvUjCilwZzQowmw2b7HQB2eAuYBabMXzWurF+I= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 h1:H86B94AW+VfJWDqFeEbBPhEtHzJwJfTbgE2lZa54ZAQ= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260406210006-6f92a3bedf2d h1:wT2n40TBqFY6wiwazVK9/iTWbsQrgk5ZfCSVFLO9LQA= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260406210006-6f92a3bedf2d/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.78.0 h1:K1XZG/yGDJnzMdd/uZHAkVqJE+xIDOcmdSFZkBUicNc= -google.golang.org/grpc v1.78.0/go.mod h1:I47qjTo4OKbMkjA/aOOwxDIiPSBofUtQUI5EfpWvW7U= +google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM= +google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -367,7 +453,11 @@ gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q= +gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= k8s.io/klog/v2 v2.90.1 h1:m4bYOKall2MmOiRaR1J+We67Do7vm9KiQVlT96lnHUw= k8s.io/klog/v2 v2.90.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= +pgregory.net/rapid v1.2.0 h1:keKAYRcjm+e1F0oAuU5F5+YPAWcyxNNRK2wud503Gnk= +pgregory.net/rapid v1.2.0/go.mod h1:PY5XlDGj0+V1FCq0o192FdRhpKHGTRIWBgqjDBTrq04= diff --git a/internal/peer/peers_test.go b/internal/peer/peers_test.go index 344c794eb8..3fca0e9f3c 100644 --- a/internal/peer/peers_test.go +++ b/internal/peer/peers_test.go @@ -12,6 +12,7 @@ import ( "github.com/facebookgo/inject" "github.com/facebookgo/startstop" "github.com/honeycombio/refinery/config" + "github.com/honeycombio/refinery/internal/redistest" "github.com/honeycombio/refinery/logger" "github.com/honeycombio/refinery/metrics" "github.com/honeycombio/refinery/pubsub" @@ -87,10 +88,14 @@ func newPeers(c config.Config) (Peers, error) { } func TestPeerShutdown(t *testing.T) { + host, port := redistest.Endpoint(t) c := &config.MockConfig{ GetPeerListenAddrVal: "0.0.0.0:8081", PeerManagementType: "redis", PeerTimeout: 5 * time.Second, + GetRedisPeerManagementVal: config.RedisPeerManagementConfig{ + Host: host + ":" + port, + }, } p, err := newPeers(c) diff --git a/internal/redistest/redistest.go b/internal/redistest/redistest.go new file mode 100644 index 0000000000..c67463935e --- /dev/null +++ b/internal/redistest/redistest.go @@ -0,0 +1,64 @@ +// Package redistest provides a shared Redis testcontainer for tests that need +// a real Redis instance. One container is started per test binary on first +// call and reused across tests; the testcontainers Reaper cleans it up when +// the process exits. +package redistest + +import ( + "context" + "net" + "net/url" + "sync" + "testing" + "time" + + "github.com/testcontainers/testcontainers-go/modules/redis" +) + +const image = "redis:6.2" + +var ( + once sync.Once + sharedHost string + sharedPort string + startup error +) + +// Endpoint returns the host and port of a shared Redis container, starting it +// on first call. The container lives for the duration of the test process. +func Endpoint(t testing.TB) (host, port string) { + t.Helper() + once.Do(start) + if startup != nil { + t.Fatalf("redistest: failed to start Redis container: %v", startup) + } + return sharedHost, sharedPort +} + +func start() { + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + c, err := redis.Run(ctx, image) + if err != nil { + startup = err + return + } + conn, err := c.ConnectionString(ctx) + if err != nil { + startup = err + return + } + u, err := url.Parse(conn) + if err != nil { + startup = err + return + } + h, p, err := net.SplitHostPort(u.Host) + if err != nil { + startup = err + return + } + sharedHost = h + sharedPort = p +} diff --git a/metrics.md b/metrics.md index 4be5fcf988..2b39f3f9f8 100644 --- a/metrics.md +++ b/metrics.md @@ -3,7 +3,7 @@ # Honeycomb Refinery Metrics Documentation This document contains the description of various metrics used in Refinery. -It was automatically generated on 2026-02-25 at 20:49:27 UTC. +It was automatically generated on 2026-04-09 at 22:21:31 UTC. Note: This document does not include metrics defined in the dynsampler-go dependency, as those metrics are generated dynamically at runtime. As a result, certain metrics may be missing or incomplete in this document, but they will still be available during execution with their full names. @@ -34,11 +34,14 @@ This table includes metrics with fully defined names. | trace_span_count | Histogram | Dimensionless | number of spans in a trace | | collector_incoming_queue | Histogram | Dimensionless | number of spans currently in the incoming queue | | collector_peer_queue_length | Gauge | Dimensionless | number of spans in the peer queue | +| collector_peer_queue_capacity | Gauge | Dimensionless | configured maximum number of spans in the peer queue | | collector_incoming_queue_length | Gauge | Dimensionless | number of spans in the incoming queue | +| collector_incoming_queue_capacity | Gauge | Dimensionless | configured maximum number of spans in the incoming queue | | collector_peer_queue | Histogram | Dimensionless | number of spans currently in the peer queue | | collector_cache_size | Gauge | Dimensionless | number of traces currently stored in the trace cache | | collect_cache_entries | Histogram | Dimensionless | Total number of traces currently stored in the cache from all workers | | memory_heap_allocation | Gauge | Bytes | current heap allocation | +| memory_limit | Gauge | Bytes | configured maximum memory allocation for the collector (derived from MaxAlloc or AvailableMemory * MaxMemoryPercentage) | | span_received | Counter | Dimensionless | number of spans received by the collector | | span_processed | Counter | Dimensionless | number of spans processed by the collector | | spans_waiting | UpDown | Dimensionless | number of spans waiting to be processed by the collector | @@ -56,6 +59,7 @@ This table includes metrics with fully defined names. | trace_send_late_span | Counter | Dimensionless | number of spans that are sent due to late span arrival | | dropped_from_stress | Counter | Dimensionless | number of spans dropped due to stress relief | | kept_from_stress | Counter | Dimensionless | number of spans kept due to stress relief | +| events_dropped | Counter | Dimensionless | number of events dropped | | trace_kept_sample_rate | Histogram | Dimensionless | sample rate of kept traces | | trace_aggregate_sample_rate | Histogram | Dimensionless | aggregate sample rate of both kept and dropped traces | | collector_collect_loop_duration_ms | Histogram | Milliseconds | duration of the collect loop, the primary event processing goroutine | diff --git a/metrics/otel_metrics.go b/metrics/otel_metrics.go index 6d969e1046..34d2f56ad4 100644 --- a/metrics/otel_metrics.go +++ b/metrics/otel_metrics.go @@ -118,13 +118,19 @@ func (o *OTelMetrics) Start() error { hostname = hn } - res, err := resource.New(ctx, + // Build resource attributes: start with defaults, then add user-defined additional attributes + resourceOpts := []resource.Option{ resource.WithAttributes(resource.Default().Attributes()...), resource.WithAttributes(attribute.KeyValue{Key: "service.name", Value: attribute.StringValue("refinery")}), resource.WithAttributes(attribute.KeyValue{Key: "service.version", Value: attribute.StringValue(o.Version)}), resource.WithAttributes(attribute.KeyValue{Key: "host.name", Value: attribute.StringValue(hostname)}), resource.WithAttributes(attribute.KeyValue{Key: "hostname", Value: attribute.StringValue(hostname)}), - ) + } + for k, v := range cfg.AdditionalAttributes { + resourceOpts = append(resourceOpts, resource.WithAttributes(attribute.KeyValue{Key: attribute.Key(k), Value: attribute.StringValue(v)})) + } + + res, err := resource.New(ctx, resourceOpts...) if err != nil { return err diff --git a/metrics/otel_metrics_test.go b/metrics/otel_metrics_test.go index 96043bf86a..cc0f7ffd44 100644 --- a/metrics/otel_metrics_test.go +++ b/metrics/otel_metrics_test.go @@ -124,6 +124,46 @@ func Test_OTelMetrics_Raciness(t *testing.T) { metricdatatest.AssertEqual(t, want, got, metricdatatest.IgnoreTimestamp()) } +func Test_OTelMetrics_AdditionalAttributes(t *testing.T) { + rdr := sdkmetric.NewManualReader() + + o := &OTelMetrics{ + Logger: &logger.MockLogger{}, + Config: &config.MockConfig{ + GetOTelMetricsConfigVal: config.OTelMetricsConfig{ + AdditionalAttributes: map[string]string{ + "cluster.id": "test-cluster-123", + "environment": "staging", + }, + }, + }, + testReader: rdr, + } + + err := o.Start() + defer o.Stop() + require.NoError(t, err) + + // Emit a metric so we can collect resource data + o.Register(Metadata{Name: "test_attr", Type: Counter}) + o.Increment("test_attr") + + rm := metricdata.ResourceMetrics{} + err = rdr.Collect(t.Context(), &rm) + require.NoError(t, err) + + // Check that the additional attributes are present as resource attributes + attrs := rm.Resource.Attributes() + attrMap := make(map[string]string) + for _, attr := range attrs { + attrMap[string(attr.Key)] = attr.Value.AsString() + } + + assert.Equal(t, "test-cluster-123", attrMap["cluster.id"], "cluster.id resource attribute should be set") + assert.Equal(t, "staging", attrMap["environment"], "environment resource attribute should be set") + assert.Equal(t, "refinery", attrMap["service.name"], "service.name should still be present") +} + func Benchmark_OTelMetrics_ConcurrentAccess(b *testing.B) { o := &OTelMetrics{ Logger: &logger.NullLogger{}, diff --git a/pubsub/pubsub_test.go b/pubsub/pubsub_test.go index 333ce78b68..32dbaa6a95 100644 --- a/pubsub/pubsub_test.go +++ b/pubsub/pubsub_test.go @@ -9,6 +9,7 @@ import ( "time" "github.com/honeycombio/refinery/config" + "github.com/honeycombio/refinery/internal/redistest" "github.com/honeycombio/refinery/logger" "github.com/honeycombio/refinery/metrics" "github.com/honeycombio/refinery/pubsub" @@ -22,17 +23,20 @@ var types = []string{ "local", } -func newPubSub(typ string) pubsub.PubSub { +func newPubSub(t testing.TB, typ string) pubsub.PubSub { + t.Helper() var ps pubsub.PubSub m := &metrics.NullMetrics{} m.Start() tracer := noop.NewTracerProvider().Tracer("test") switch typ { case "goredis": + host, port := redistest.Endpoint(t) ps = &pubsub.GoRedisPubSub{ Config: &config.MockConfig{ GetRedisPeerManagementVal: config.RedisPeerManagementConfig{ ClusterName: "test", + Host: host + ":" + port, }, }, Metrics: m, @@ -71,7 +75,7 @@ func TestPubSubBasics(t *testing.T) { ctx := context.Background() for _, typ := range types { t.Run(typ, func(t *testing.T) { - ps := newPubSub(typ) + ps := newPubSub(t, typ) l1 := &pubsubListener{} @@ -105,7 +109,7 @@ func TestPubSubMultiSubscriber(t *testing.T) { ctx := context.Background() for _, typ := range types { t.Run(typ, func(t *testing.T) { - ps := newPubSub(typ) + ps := newPubSub(t, typ) l1 := &pubsubListener{} l2 := &pubsubListener{} topic := ps.FormatTopic("topic") @@ -138,7 +142,7 @@ func TestPubSubMultiTopic(t *testing.T) { ctx := context.Background() for _, typ := range types { t.Run(typ, func(t *testing.T) { - ps := newPubSub(typ) + ps := newPubSub(t, typ) time.Sleep(500 * time.Millisecond) topics := make([]string, topicCount) listeners := make([]*pubsubListener, topicCount) @@ -190,7 +194,7 @@ func TestPubSubLatency(t *testing.T) { ctx := context.Background() for _, typ := range types { t.Run(typ, func(t *testing.T) { - ps := newPubSub(typ) + ps := newPubSub(t, typ) var count, total, tmin, tmax int64 mut := sync.Mutex{} @@ -252,7 +256,7 @@ func BenchmarkPubSub(b *testing.B) { ctx := context.Background() for _, typ := range types { b.Run(typ, func(b *testing.B) { - ps := newPubSub(typ) + ps := newPubSub(b, typ) time.Sleep(100 * time.Millisecond) li := &pubsubListener{} diff --git a/refinery_config.md b/refinery_config.md index fc53f1da5e..6da3c7a63f 100644 --- a/refinery_config.md +++ b/refinery_config.md @@ -158,16 +158,31 @@ Not intended or supported for customer use. This list only applies to span traffic - other Honeycomb API actions will be proxied through to the upstream API directly without modifying keys. -- Not eligible for live reload. +- Eligible for live reload. - Type: `stringarray` - Example: `your-key-goes-here` +### `ReceiveKeyIDs` + +`ReceiveKeyIDs` is a set of Honeycomb Ingest Key IDs that the proxy will treat specially. + +When `AcceptOnlyListedKeys` is `true`, traffic using an API key whose Honeycomb ingest key ID matches an entry in this list will be accepted. +The key ID is the `id` field returned by the Honeycomb `/1/auth` endpoint; it is distinct from the full API key value. +This allows authorization based on key IDs rather than full key values, which avoids storing secret key material in the configuration file. +Both `ReceiveKeys` and `ReceiveKeyIDs` may be used simultaneously. +Note: This feature does not support legacy API keys. +Only Honeycomb Ingest Keys (which have a key ID) are compatible with this setting. + +- Eligible for live reload. +- Type: `stringarray` +- Example: `your-key-id-goes-here` + ### `AcceptOnlyListedKeys` `AcceptOnlyListedKeys` is a boolean flag that causes events arriving with API keys not in the `ReceiveKeys` list to be rejected. -If `true`, then only traffic using the keys listed in `ReceiveKeys` is accepted. -Events arriving with API keys not in the `ReceiveKeys` list will be rejected with an HTTP `401` error. +If `true`, then only traffic using the keys listed in `ReceiveKeys` or whose key ID is listed in `ReceiveKeyIDs` is accepted. +Events arriving with API keys not in either list will be rejected with an HTTP `401` error. If `false`, then all traffic is accepted and `ReceiveKeys` is ignored. This setting is applied **before** the `SendKey` and `SendKeyMode` settings. @@ -644,6 +659,22 @@ In rare circumstances, compression costs may outweigh the benefits, in which cas - Default: `gzip` - Options: `none`, `gzip` +### `AdditionalAttributes` + +`AdditionalAttributes` adds the provided attributes as resource attributes on all OpenTelemetry metrics emitted by Refinery. + +This is useful for injecting deployment-specific metadata (such as a cluster ID or environment name) into metrics so they can be filtered or grouped in the metrics backend. +Both keys and values must be strings. +When supplying via a environment variable, the value should be a string of comma-separated key-value pairs. +When supplying via the command line, the value should be a key value pair. +If multiple key-value pairs are needed, each should be supplied via its own command line flag. +The key-value pairs must use ':' as the separator. + +- Not eligible for live reload. +- Type: `map` +- Example: `pipeline.id:'12345',rollout.id:'67890'` +- Environment variable: `REFINERY_OTEL_METRICS_ADDITIONAL_ATTRIBUTES` + ## OpenTelemetry Tracing `OTelTracing` contains configuration for Refinery's own tracing. @@ -1055,6 +1086,17 @@ A trace without a `parent_id` is assumed to be a root span. - Type: `stringarray` - Example: `trace.parent_id,parentId` +### `SpanNames` + +`SpanNames` is the list of field names to use for the span ID. + +The first field in the list that is present on a span will be used as that span's ID. +This is required for `SpanCounters` entries that set `ScopeConditions` (per-anchor subtree counting), which must resolve each span's parent ID to a span ID in the same trace. + +- Eligible for live reload. +- Type: `stringarray` +- Example: `trace.span_id,spanId` + ## gRPC Server Parameters `GRPCServerParameters` controls the parameters of the gRPC server used to receive OpenTelemetry data in gRPC format. diff --git a/refinery_rules.md b/refinery_rules.md index 5fcd2b9592..7cca7b1aa9 100644 --- a/refinery_rules.md +++ b/refinery_rules.md @@ -671,3 +671,50 @@ If your traces are consistent lengths and changes in trace length is a useful in - Type: `bool` +## Custom Span Count Configuration + +Defines a single custom span counter. +Each counter has a Key that names the field written to a target span and an optional list of Conditions that must all match for a span to be counted. +By default the trace-wide count is written to the root span under Key. +When ScopeConditions is set, every span matching ScopeConditions instead receives the count of matching descendant spans in its own subtree; setting RootKey alongside additionally writes the trace-wide total to the root span under RootKey. +If no root span exists when the trace is sent, root writes go to the first non-annotation span instead. + +### `Key` + +The name of the field that will be added to each target span. +Must not be empty. +When `ScopeConditions` is set, this is the field written to each anchor span; when unset (the original behavior), this is the field written to the root span. +Keys in the `meta.refinery.` namespace are reserved for Refinery's own metadata and are rejected at validation. +Keys starting with `meta.` produce a warning, because int fields with a value of `0` cannot be distinguished from a missing field on the wire β€” meaning zero-count anchors will appear absent to downstream queries. + +- Type: `string` + +### `RootKey` + +Only meaningful when `ScopeConditions` is set. +Setting `RootKey` opts the root span into receiving the trace-wide total, written under this field name (which is typically different from `Key` so per-anchor counts and the trace-wide total can be queried independently). +If `RootKey` is left empty on a scoped counter the root receives no write. +Ignored (with a validation warning) when `ScopeConditions` is empty β€” unscoped counters always write `Key` to the root. +Subject to the same reserved-namespace rules as `Key` and counts as a separate field for cross-counter uniqueness checks. + +- Type: `string` + +### `Conditions` + +All conditions must match for a span to be counted. +If empty, every span in the trace is counted. +Uses the same condition format as rules-based sampler conditions. +An anchor span (one matching `ScopeConditions`) is also tested against Conditions like any other span β€” if it matches, it counts itself. + +- Type: `objectarray` + +### `ScopeConditions` + +When set, each span satisfying all of these conditions becomes an "anchor" and receives the count of matching descendant spans in its own subtree (including the anchor span itself when it matches `Conditions`). +When omitted, the counter writes a single trace-wide total to the root span β€” the original SpanCounter behavior. +Set `RootKey` alongside `ScopeConditions` to additionally emit the trace-wide total on the root. +Nested anchors are not special-cased: an outer anchor's count includes the descendant subtree even if it crosses an inner anchor. +Uses the same condition format as rules-based sampler conditions; the trace-level `has-root-span` operator is rejected at validation. + +- Type: `objectarray` + diff --git a/route/middleware.go b/route/middleware.go index 3b1d0081ed..6a86f47156 100644 --- a/route/middleware.go +++ b/route/middleware.go @@ -45,12 +45,16 @@ func (r *Router) apiKeyProcessor(next http.Handler) http.Handler { } keycfg := r.Config.GetAccessKeyConfig() - if err := keycfg.IsAccepted(apiKey); err != nil { + keyID := "" + if keycfg.HasKeyIDs() { + keyID = r.getKeyID(apiKey) + } + if err := keycfg.IsAccepted(apiKey, keyID); err != nil { r.handlerReturnWithError(w, ErrAuthInvalid, err) return } - replacement, err := keycfg.GetReplaceKey(apiKey) + replacement, err := keycfg.GetReplaceKey(apiKey, keyID) if err != nil { r.handlerReturnWithError(w, ErrAuthInvalid, err) return diff --git a/route/otlp_logs.go b/route/otlp_logs.go index be23128b3e..6f2e39669f 100644 --- a/route/otlp_logs.go +++ b/route/otlp_logs.go @@ -18,11 +18,15 @@ func (r *Router) postOTLPLogs(w http.ResponseWriter, req *http.Request) { ri := huskyotlp.GetRequestInfoFromHttpHeaders(req.Header) apicfg := r.Config.GetAccessKeyConfig() - if err := apicfg.IsAccepted(ri.ApiKey); err != nil { + keyID := "" + if apicfg.HasKeyIDs() { + keyID = r.getKeyID(ri.ApiKey) + } + if err := apicfg.IsAccepted(ri.ApiKey, keyID); err != nil { r.handleOTLPFailureResponse(w, req, huskyotlp.OTLPError{Message: err.Error(), HTTPStatusCode: http.StatusUnauthorized}) return } - keyToUse, _ := apicfg.GetReplaceKey(ri.ApiKey) + keyToUse, _ := apicfg.GetReplaceKey(ri.ApiKey, keyID) if err := ri.ValidateLogsHeaders(); err != nil { switch err { @@ -79,10 +83,14 @@ func (l *LogsServer) Export(ctx context.Context, req *collectorlogs.ExportLogsSe l.router.Metrics.Increment(l.router.metricsNames.routerOtlpLogGrpc) ri := huskyotlp.GetRequestInfoFromGrpcMetadata(ctx) apicfg := l.router.Config.GetAccessKeyConfig() - if err := apicfg.IsAccepted(ri.ApiKey); err != nil { + keyID := "" + if apicfg.HasKeyIDs() { + keyID = l.router.getKeyID(ri.ApiKey) + } + if err := apicfg.IsAccepted(ri.ApiKey, keyID); err != nil { return nil, status.Error(codes.Unauthenticated, err.Error()) } - keyToUse, _ := apicfg.GetReplaceKey(ri.ApiKey) + keyToUse, _ := apicfg.GetReplaceKey(ri.ApiKey, keyID) if err := ri.ValidateLogsHeaders(); err != nil && err != huskyotlp.ErrMissingAPIKeyHeader { return nil, huskyotlp.AsGRPCError(err) diff --git a/route/otlp_logs_test.go b/route/otlp_logs_test.go index 5a8d868033..7c97fbf16f 100644 --- a/route/otlp_logs_test.go +++ b/route/otlp_logs_test.go @@ -603,8 +603,8 @@ func TestLogsOTLPHandler(t *testing.T) { }, } { t.Run(fmt.Sprintf("ApiKey %s SendKeyMode %s SendKey %s", tt.apiKey, tt.mode, tt.sendKey), func(t *testing.T) { - router.environmentCache.addItem(tt.apiKey, "local", time.Minute) - router.environmentCache.addItem(tt.sendKey, "local", time.Minute) + router.environmentCache.addItem(tt.apiKey, authData{environment: "local"}, time.Minute) + router.environmentCache.addItem(tt.sendKey, authData{environment: "local"}, time.Minute) // HTTP request, _ := http.NewRequest("POST", "/v1/logs", bytes.NewReader(body)) diff --git a/route/otlp_trace.go b/route/otlp_trace.go index 2e8fddf177..ff1d4df5dd 100644 --- a/route/otlp_trace.go +++ b/route/otlp_trace.go @@ -26,11 +26,15 @@ func (r *Router) postOTLPTrace(w http.ResponseWriter, req *http.Request) { ri := huskyotlp.GetRequestInfoFromHttpHeaders(req.Header) apicfg := r.Config.GetAccessKeyConfig() - if err := apicfg.IsAccepted(ri.ApiKey); err != nil { + keyID := "" + if apicfg.HasKeyIDs() { + keyID = r.getKeyID(ri.ApiKey) + } + if err := apicfg.IsAccepted(ri.ApiKey, keyID); err != nil { r.handleOTLPFailureResponse(w, req, huskyotlp.OTLPError{Message: err.Error(), HTTPStatusCode: http.StatusUnauthorized}) return } - keyToUse, _ := apicfg.GetReplaceKey(ri.ApiKey) + keyToUse, _ := apicfg.GetReplaceKey(ri.ApiKey, keyID) if err := ri.ValidateTracesHeaders(); err != nil { switch err { @@ -137,7 +141,11 @@ func (t *TraceServer) ExportTraceData( // Perform final authentication check (key processing already done in handler) apicfg := t.router.Config.GetAccessKeyConfig() - if err := apicfg.IsAccepted(ri.ApiKey); err != nil { + keyID := "" + if apicfg.HasKeyIDs() { + keyID = t.router.getKeyID(ri.ApiKey) + } + if err := apicfg.IsAccepted(ri.ApiKey, keyID); err != nil { return nil, status.Error(codes.Unauthenticated, err.Error()) } @@ -205,7 +213,11 @@ func customTraceExportHandler( // Handle SendKeyMode logic before validation, similar to HTTP handler apicfg := traceServer.router.Config.GetAccessKeyConfig() - keyToUse, err := apicfg.GetReplaceKey(ri.ApiKey) + keyID := "" + if apicfg.HasKeyIDs() { + keyID = traceServer.router.getKeyID(ri.ApiKey) + } + keyToUse, err := apicfg.GetReplaceKey(ri.ApiKey, keyID) if err != nil { return nil, status.Error(codes.Unauthenticated, err.Error()) } diff --git a/route/otlp_trace_test.go b/route/otlp_trace_test.go index a57b36ac64..dd5653a986 100644 --- a/route/otlp_trace_test.go +++ b/route/otlp_trace_test.go @@ -505,7 +505,7 @@ func TestOTLPHandler(t *testing.T) { apiKey := "my-api-key" // add cached environment lookup - router.environmentCache.addItem(apiKey, "local", time.Minute) + router.environmentCache.addItem(apiKey, authData{environment: "local"}, time.Minute) req := &collectortrace.ExportTraceServiceRequest{ ResourceSpans: []*trace.ResourceSpans{{ @@ -633,7 +633,7 @@ func TestOTLPHandler(t *testing.T) { event := events[0] // Note: GRPC clients override the user-agent header with their own value. // This is expected behavior and differs from HTTP where custom user-agents are preserved. - assert.Equal(t, "grpc-go/1.78.0", event.Data.MetaRefineryIncomingUserAgent) + assert.Equal(t, "grpc-go/1.80.0", event.Data.MetaRefineryIncomingUserAgent) }) t.Run("spans record incoming user agent - HTTP", func(t *testing.T) { @@ -920,8 +920,8 @@ func TestOTLPHandler(t *testing.T) { }, } { t.Run(fmt.Sprintf("ApiKey %s SendKeyMode %s SendKey %s", tt.apiKey, tt.mode, tt.sendKey), func(t *testing.T) { - router.environmentCache.addItem(tt.apiKey, "local", time.Minute) - router.environmentCache.addItem(tt.sendKey, "local", time.Minute) + router.environmentCache.addItem(tt.apiKey, authData{environment: "local"}, time.Minute) + router.environmentCache.addItem(tt.sendKey, authData{environment: "local"}, time.Minute) // HTTP request, _ := http.NewRequest("POST", "/v1/traces", bytes.NewReader(body)) diff --git a/route/route.go b/route/route.go index 2f0c436a26..cf1a0d6a7e 100644 --- a/route/route.go +++ b/route/route.go @@ -994,18 +994,32 @@ func getFirstValueFromMetadata(key string, md metadata.MD) string { return "" } +// authData holds the information retrieved from the Honeycomb /1/auth endpoint +// and stored in the environment cache. +type authData struct { + environment string + keyID string +} + type environmentCache struct { mutex sync.RWMutex items map[string]*cacheItem ttl time.Duration - getFn func(string) (string, error) + getFn func(string) (authData, error) } +// SetEnvironmentCache replaces the environment cache with a new one using the +// provided TTL and lookup function. The lookup function returns only the +// environment name, and the key ID will be empty in the cached authData. +// This method exists for backward compatibility with tests. func (r *Router) SetEnvironmentCache(ttl time.Duration, getFn func(string) (string, error)) { - r.environmentCache = newEnvironmentCache(ttl, getFn) + r.environmentCache = newEnvironmentCache(ttl, func(key string) (authData, error) { + env, err := getFn(key) + return authData{environment: env}, err + }) } -func newEnvironmentCache(ttl time.Duration, getFn func(string) (string, error)) *environmentCache { +func newEnvironmentCache(ttl time.Duration, getFn func(string) (authData, error)) *environmentCache { return &environmentCache{ items: make(map[string]*cacheItem), ttl: ttl, @@ -1015,13 +1029,13 @@ func newEnvironmentCache(ttl time.Duration, getFn func(string) (string, error)) type cacheItem struct { expiresAt time.Time - value string + value authData } // get queries the cached items, returning cache hits that have not expired. // Cache missed use the configured getFn to populate the cache. -func (c *environmentCache) get(key string) (string, error) { - var val string +func (c *environmentCache) get(key string) (authData, error) { + var val authData // get read lock so that we don't attempt to read from the map // while another routine has a write lock and is actively writing // to the map. @@ -1032,7 +1046,7 @@ func (c *environmentCache) get(key string) (string, error) { } } c.mutex.RUnlock() - if val != "" { + if val.environment != "" { return val, nil } @@ -1051,7 +1065,7 @@ func (c *environmentCache) get(key string) (string, error) { val, err := c.getFn(key) if err != nil { - return "", err + return authData{}, err } c.addItem(key, val, c.ttl) @@ -1060,7 +1074,7 @@ func (c *environmentCache) get(key string) (string, error) { // addItem create a new cache entry in the environment cache. // This is not thread-safe, and should only be used in tests -func (c *environmentCache) addItem(key string, value string, ttl time.Duration) { +func (c *environmentCache) addItem(key string, value authData, ttl time.Duration) { c.items[key] = &cacheItem{ expiresAt: time.Now().Add(ttl), value: value, @@ -1080,6 +1094,7 @@ type AuthInfo struct { APIKeyAccess map[string]bool `json:"api_key_access"` Team TeamInfo `json:"team"` Environment EnvironmentInfo `json:"environment"` + ID string `json:"id"` } func (r *Router) getEnvironmentName(apiKey string) (string, error) { @@ -1087,24 +1102,36 @@ func (r *Router) getEnvironmentName(apiKey string) (string, error) { return "", nil } - env, err := r.environmentCache.get(apiKey) + data, err := r.environmentCache.get(apiKey) if err != nil { return "", err } - return env, nil + return data.environment, nil } -func (r *Router) lookupEnvironment(apiKey string) (string, error) { +// getKeyID returns the Honeycomb ingest key ID associated with the given API +// key. It uses the environment cache, so no additional API call is made if the +// key has already been looked up. Returns an empty string for legacy keys, +// blank keys, or if the lookup fails. +func (r *Router) getKeyID(apiKey string) string { + if apiKey == "" || config.IsLegacyAPIKey(apiKey) { + return "" + } + data, _ := r.environmentCache.get(apiKey) + return data.keyID +} + +func (r *Router) lookupEnvironment(apiKey string) (authData, error) { apiEndpoint := r.Config.GetHoneycombAPI() authURL, err := url.Parse(apiEndpoint) if err != nil { - return "", fmt.Errorf("failed to parse Honeycomb API URL config value. %w", err) + return authData{}, fmt.Errorf("failed to parse Honeycomb API URL config value. %w", err) } authURL.Path = "/1/auth" req, err := http.NewRequest("GET", authURL.String(), nil) if err != nil { - return "", fmt.Errorf("failed to create AuthInfo request. %w", err) + return authData{}, fmt.Errorf("failed to create AuthInfo request. %w", err) } req.Header.Set("x-Honeycomb-team", apiKey) @@ -1112,23 +1139,26 @@ func (r *Router) lookupEnvironment(apiKey string) (string, error) { r.Logger.Debug().WithString("endpoint", authURL.String()).Logf("Attempting to get environment name using API key") resp, err := r.proxyClient.Do(req) if err != nil { - return "", fmt.Errorf("failed sending AuthInfo request to Honeycomb API. %w", err) + return authData{}, fmt.Errorf("failed sending AuthInfo request to Honeycomb API. %w", err) } defer resp.Body.Close() switch { case resp.StatusCode == http.StatusUnauthorized: - return "", fmt.Errorf("received 401 response for AuthInfo request from Honeycomb API - check your API key") + return authData{}, fmt.Errorf("received 401 response for AuthInfo request from Honeycomb API - check your API key") case resp.StatusCode > 299: - return "", fmt.Errorf("received %d response for AuthInfo request from Honeycomb API", resp.StatusCode) + return authData{}, fmt.Errorf("received %d response for AuthInfo request from Honeycomb API", resp.StatusCode) } authinfo := AuthInfo{} if err := json.NewDecoder(resp.Body).Decode(&authinfo); err != nil { - return "", fmt.Errorf("failed to JSON decode of AuthInfo response from Honeycomb API") + return authData{}, fmt.Errorf("failed to JSON decode of AuthInfo response from Honeycomb API") } r.Logger.Debug().WithString("environment", authinfo.Environment.Name).Logf("Got environment") - return authinfo.Environment.Name, nil + return authData{ + environment: authinfo.Environment.Name, + keyID: authinfo.ID, + }, nil } func (r *Router) Check(ctx context.Context, req *grpc_health_v1.HealthCheckRequest) (*grpc_health_v1.HealthCheckResponse, error) { diff --git a/route/route_test.go b/route/route_test.go index 81781a4ec6..1e47db3beb 100644 --- a/route/route_test.go +++ b/route/route_test.go @@ -741,53 +741,53 @@ func TestDependencyInjection(t *testing.T) { func TestEnvironmentCache(t *testing.T) { t.Run("calls getFn on cache miss", func(t *testing.T) { - cache := newEnvironmentCache(time.Second, func(key string) (string, error) { + cache := newEnvironmentCache(time.Second, func(key string) (authData, error) { if key != "key" { t.Errorf("expected %s - got %s", "key", key) } - return "test", nil + return authData{environment: "test"}, nil }) val, err := cache.get("key") if err != nil { t.Errorf("got error calling getOrSet - %e", err) } - if val != "test" { - t.Errorf("expected %s - got %s", "test", val) + if val.environment != "test" { + t.Errorf("expected %s - got %s", "test", val.environment) } }) t.Run("does not call getFn on cache hit", func(t *testing.T) { - cache := newEnvironmentCache(time.Second, func(key string) (string, error) { + cache := newEnvironmentCache(time.Second, func(key string) (authData, error) { t.Errorf("should not have called getFn") - return "", nil + return authData{}, nil }) - cache.addItem("key", "value", time.Second) + cache.addItem("key", authData{environment: "value"}, time.Second) val, err := cache.get("key") if err != nil { t.Errorf("got error calling getOrSet - %e", err) } - if val != "value" { - t.Errorf("expected %s - got %s", "value", val) + if val.environment != "value" { + t.Errorf("expected %s - got %s", "value", val.environment) } }) t.Run("ignores expired items", func(t *testing.T) { called := false - cache := newEnvironmentCache(time.Millisecond, func(key string) (string, error) { + cache := newEnvironmentCache(time.Millisecond, func(key string) (authData, error) { called = true - return "value", nil + return authData{environment: "value"}, nil }) - cache.addItem("key", "value", time.Millisecond) + cache.addItem("key", authData{environment: "value"}, time.Millisecond) time.Sleep(time.Millisecond * 5) val, err := cache.get("key") if err != nil { t.Errorf("got error calling getOrSet - %e", err) } - if val != "value" { - t.Errorf("expected %s - got %s", "value", val) + if val.environment != "value" { + t.Errorf("expected %s - got %s", "value", val.environment) } if !called { t.Errorf("expected to call getFn") @@ -796,8 +796,8 @@ func TestEnvironmentCache(t *testing.T) { t.Run("errors returned from getFn are propagated", func(t *testing.T) { expectedErr := errors.New("error") - cache := newEnvironmentCache(time.Second, func(key string) (string, error) { - return "", expectedErr + cache := newEnvironmentCache(time.Second, func(key string) (authData, error) { + return authData{}, expectedErr }) _, err := cache.get("key") @@ -1206,7 +1206,7 @@ func newBatchRouter(t testing.TB) *Router { Sharder: mockSharder, routerType: types.RouterTypeIncoming, iopLogger: iopLogger{Logger: &logger.NullLogger{}, incomingOrPeer: types.RouterTypeIncoming.String()}, - environmentCache: newEnvironmentCache(time.Second, func(key string) (string, error) { return "test", nil }), + environmentCache: newEnvironmentCache(time.Second, func(key string) (authData, error) { return authData{environment: "test"}, nil }), Tracer: noop.Tracer{}, } var err error diff --git a/rules.md b/rules.md index ee6023bc0e..dcf40e3b46 100644 --- a/rules.md +++ b/rules.md @@ -3,7 +3,7 @@ # Honeycomb Refinery Rules Documentation This is the documentation for the rules configuration for Honeycomb's Refinery. -It was automatically generated on 2026-02-25 at 20:49:27 UTC. +It was automatically generated on 2026-06-01 at 19:01:21 UTC. ## The Rules file @@ -55,6 +55,7 @@ The remainder of this document describes the samplers that can be used within th - [Rules for Rules-based Samplers](#rules-for-rules-based-samplers) - [Conditions for the Rules in Rules-based Samplers](#conditions-for-the-rules-in-rules-based-samplers) - [Total Throughput Sampler](#total-throughput-sampler) +- [Custom Span Count Configuration](#custom-span-count-configuration) --- ## Deterministic Sampler @@ -715,3 +716,53 @@ If your traces are consistent lengths and changes in trace length is a useful in Type: `bool` +--- +## Custom Span Count Configuration + +### Name: `SpanCounters` + +Defines a single custom span counter. +Each counter has a Key that names the field written to a target span and an optional list of Conditions that must all match for a span to be counted. +By default the trace-wide count is written to the root span under Key. +When ScopeConditions is set, every span matching ScopeConditions instead receives the count of matching descendant spans in its own subtree; setting RootKey alongside additionally writes the trace-wide total to the root span under RootKey. +If no root span exists when the trace is sent, root writes go to the first non-annotation span instead. + +### `Key` + +The name of the field that will be added to each target span. +Must not be empty. +When `ScopeConditions` is set, this is the field written to each anchor span; when unset (the original behavior), this is the field written to the root span. +Keys in the `meta.refinery.` namespace are reserved for Refinery's own metadata and are rejected at validation. +Keys starting with `meta.` produce a warning, because int fields with a value of `0` cannot be distinguished from a missing field on the wire β€” meaning zero-count anchors will appear absent to downstream queries. + +Type: `string` + +### `RootKey` + +Only meaningful when `ScopeConditions` is set. +Setting `RootKey` opts the root span into receiving the trace-wide total, written under this field name (which is typically different from `Key` so per-anchor counts and the trace-wide total can be queried independently). +If `RootKey` is left empty on a scoped counter the root receives no write. +Ignored (with a validation warning) when `ScopeConditions` is empty β€” unscoped counters always write `Key` to the root. +Subject to the same reserved-namespace rules as `Key` and counts as a separate field for cross-counter uniqueness checks. + +Type: `string` + +### `Conditions` + +All conditions must match for a span to be counted. +If empty, every span in the trace is counted. +Uses the same condition format as rules-based sampler conditions. +An anchor span (one matching `ScopeConditions`) is also tested against Conditions like any other span β€” if it matches, it counts itself. + +Type: `objectarray` + +### `ScopeConditions` + +When set, each span satisfying all of these conditions becomes an "anchor" and receives the count of matching descendant spans in its own subtree (including the anchor span itself when it matches `Conditions`). +When omitted, the counter writes a single trace-wide total to the root span β€” the original SpanCounter behavior. +Set `RootKey` alongside `ScopeConditions` to additionally emit the trace-wide total on the root. +Nested anchors are not special-cased: an outer anchor's count includes the descendant subtree even if it crosses an inner anchor. +Uses the same condition format as rules-based sampler conditions; the trace-level `has-root-span` operator is rejected at validation. + +Type: `objectarray` + diff --git a/sample/dynamic.go b/sample/dynamic.go index 7bb9022a81..7be6816e5a 100644 --- a/sample/dynamic.go +++ b/sample/dynamic.go @@ -41,7 +41,7 @@ type DynamicSampler struct { keyFields, nonRootFields []string dynsampler dynsampler.Sampler - metricsRecorder dynsamplerMetricsRecorder + metricsRecorder *dynsamplerMetricsRecorder } func (d *DynamicSampler) Start() error { @@ -56,12 +56,13 @@ func (d *DynamicSampler) Start() error { d.key = newTraceKey(d.Config.FieldList, d.Config.UseTraceLength) d.keyFields, d.nonRootFields = config.GetKeyFields(d.Config.GetSamplingFields()) - // Register statistics from the dynsampler-go package - d.metricsRecorder = dynsamplerMetricsRecorder{ - met: d.Metrics, - prefix: "dynamic", + if d.metricsRecorder == nil { + d.metricsRecorder = &dynsamplerMetricsRecorder{ + met: d.Metrics, + prefix: "dynamic", + } + d.metricsRecorder.RegisterMetrics(d.dynsampler) } - d.metricsRecorder.RegisterMetrics(d.dynsampler) return nil } diff --git a/sample/dynamic_ema.go b/sample/dynamic_ema.go index 8372923c09..0dd19bf811 100644 --- a/sample/dynamic_ema.go +++ b/sample/dynamic_ema.go @@ -56,12 +56,13 @@ func (d *EMADynamicSampler) Start() error { d.keyFields, d.nonRootFields = config.GetKeyFields(d.Config.GetSamplingFields()) d.key = newTraceKey(d.Config.FieldList, d.Config.UseTraceLength) - // Register statistics this package will produce - d.metricsRecorder = &dynsamplerMetricsRecorder{ - prefix: "emadynamic", - met: d.Metrics, + if d.metricsRecorder == nil { + d.metricsRecorder = &dynsamplerMetricsRecorder{ + prefix: "emadynamic", + met: d.Metrics, + } + d.metricsRecorder.RegisterMetrics(d.dynsampler) } - d.metricsRecorder.RegisterMetrics(d.dynsampler) return nil } diff --git a/sample/ema_throughput.go b/sample/ema_throughput.go index e881933e2c..7af96e9bdb 100644 --- a/sample/ema_throughput.go +++ b/sample/ema_throughput.go @@ -58,12 +58,13 @@ func (d *EMAThroughputSampler) Start() error { d.key = newTraceKey(d.Config.FieldList, d.Config.UseTraceLength) d.keyFields, d.nonRootFields = config.GetKeyFields(d.Config.GetSamplingFields()) - // Register statistics this package will produce - d.metricsRecorder = &dynsamplerMetricsRecorder{ - prefix: "emathroughput", - met: d.Metrics, + if d.metricsRecorder == nil { + d.metricsRecorder = &dynsamplerMetricsRecorder{ + prefix: "emathroughput", + met: d.Metrics, + } + d.metricsRecorder.RegisterMetrics(d.dynsampler) } - d.metricsRecorder.RegisterMetrics(d.dynsampler) return nil } diff --git a/sample/rules.go b/sample/rules.go index 3f907f3ad1..cf25d547dd 100644 --- a/sample/rules.go +++ b/sample/rules.go @@ -308,158 +308,8 @@ func extractValueFromSpan( return nil, false, false } -// This only gets called when we're using one of the basic operators, and -// there is no datatype specified (meaning that the Matches function has not -// been set). In this case, we need to do some type conversion and comparison -// to determine whether the condition matches the value. +// conditionMatchesValue delegates to config.ConditionMatchesValue. +// It is called when condition.Matches is nil (Datatype was not specified). func conditionMatchesValue(condition *config.RulesBasedSamplerCondition, value interface{}, exists bool) bool { - var match bool - switch exists { - case true: - switch condition.Operator { - case config.Exists: - match = exists - case config.NEQ: - if comparison, ok := compare(value, condition.Value); ok { - match = comparison != equal - } - case config.EQ: - if comparison, ok := compare(value, condition.Value); ok { - match = comparison == equal - } - case config.GT: - if comparison, ok := compare(value, condition.Value); ok { - match = comparison == more - } - case config.GTE: - if comparison, ok := compare(value, condition.Value); ok { - match = comparison == more || comparison == equal - } - case config.LT: - if comparison, ok := compare(value, condition.Value); ok { - match = comparison == less - } - case config.LTE: - if comparison, ok := compare(value, condition.Value); ok { - match = comparison == less || comparison == equal - } - } - case false: - switch condition.Operator { - case config.NotExists: - match = !exists - } - } - return match -} - -const ( - less = -1 - equal = 0 - more = 1 -) - -func compare(a, b interface{}) (int, bool) { - // a is the tracing data field value. This can be: float64, int64, bool, or string - // b is the Rule condition value. This can be: float64, int64, int, bool, or string - // Note: in YAML config parsing, the Value may be returned as int - // When comparing numeric values, we need to check across the 3 types: float64, int64, and int - - if a == nil { - if b == nil { - return equal, true - } - - return less, true - } - - if b == nil { - return more, true - } - - switch at := a.(type) { - case int64: - switch bt := b.(type) { - case int: - i := int(at) - switch { - case i < bt: - return less, true - case i > bt: - return more, true - default: - return equal, true - } - case int64: - switch { - case at < bt: - return less, true - case at > bt: - return more, true - default: - return equal, true - } - case float64: - f := float64(at) - switch { - case f < bt: - return less, true - case f > bt: - return more, true - default: - return equal, true - } - } - case float64: - switch bt := b.(type) { - case int: - f := float64(bt) - switch { - case at < f: - return less, true - case at > f: - return more, true - default: - return equal, true - } - case int64: - f := float64(bt) - switch { - case at < f: - return less, true - case at > f: - return more, true - default: - return equal, true - } - case float64: - switch { - case at < bt: - return less, true - case at > bt: - return more, true - default: - return equal, true - } - } - case bool: - switch bt := b.(type) { - case bool: - switch { - case !at && bt: - return less, true - case at && !bt: - return more, true - default: - return equal, true - } - } - case string: - switch bt := b.(type) { - case string: - return strings.Compare(at, bt), true - } - } - - return equal, false + return config.ConditionMatchesValue(condition, value, exists) } diff --git a/sample/sample.go b/sample/sample.go index e90b172e4b..eff69f3abd 100644 --- a/sample/sample.go +++ b/sample/sample.go @@ -3,6 +3,7 @@ package sample import ( "fmt" "os" + "slices" "strings" "sync" @@ -24,6 +25,15 @@ type CanSetGoalThroughputPerSec interface { SetGoalThroughputPerSec(int) } +type sharedDynsamplerEntry struct { + dynsampler any + recorder *dynsamplerMetricsRecorder +} + +var samplerFactoryMetrics = []metrics.Metadata{ + {Name: "unique_dynsampler_count", Type: metrics.Gauge, Unit: metrics.Dimensionless, Description: "Number of unique dynsampler-go samplers created"}, +} + // SamplerFactory is used to create new samplers with common (injected) resources type SamplerFactory struct { Config config.Config `inject:""` @@ -33,8 +43,8 @@ type SamplerFactory struct { peerCount int mutex sync.Mutex - // Shared dynsampler instances to maintain global throughput tracking - sharedDynsamplers map[string]any + // Shared dynsampler instances and their metrics recorders, keyed identically to avoid NΓ—overcounting + sharedDynsamplers map[string]sharedDynsamplerEntry // Store original GoalThroughputPerSec values for cluster size calculations. // We need this to recalculate goal throughput values when the cluster size @@ -55,8 +65,8 @@ func (s *SamplerFactory) updatePeerCounts() { } // Update goal throughput for all throughput-based dynsamplers - for dynsamplerKey, dynsamplerInstance := range s.sharedDynsamplers { - if hasThroughput, ok := dynsamplerInstance.(CanSetGoalThroughputPerSec); ok { + for dynsamplerKey, entry := range s.sharedDynsamplers { + if hasThroughput, ok := entry.dynsampler.(CanSetGoalThroughputPerSec); ok { if cfg, ok := s.goalThroughputConfigs[dynsamplerKey]; ok { // Calculate new throughput based on cluster size newThroughput := max(cfg/s.peerCount, 1) @@ -68,30 +78,46 @@ func (s *SamplerFactory) updatePeerCounts() { func (s *SamplerFactory) Start() error { s.peerCount = 1 - s.sharedDynsamplers = make(map[string]any) + s.sharedDynsamplers = make(map[string]sharedDynsamplerEntry) s.goalThroughputConfigs = make(map[string]int) if s.Peers != nil { s.Peers.RegisterUpdatedPeersCallback(s.updatePeerCounts) } + for _, metric := range samplerFactoryMetrics { + s.Metrics.Register(metric) + } return nil } -func getSharedDynsampler[ST any, CT any]( +func getSharedDynsamplerAndRecorder[ST dynsampler.Sampler, CT any]( s *SamplerFactory, dynsamplerKey string, + prefix string, config CT, create func(config CT) ST, -) ST { +) (ST, *dynsamplerMetricsRecorder) { s.mutex.Lock() defer s.mutex.Unlock() - var ok bool - var dynsamplerInstance ST - if dynsamplerInstance, ok = s.sharedDynsamplers[dynsamplerKey].(ST); !ok { - dynsamplerInstance = create(config) - s.sharedDynsamplers[dynsamplerKey] = dynsamplerInstance + if entry, ok := s.sharedDynsamplers[dynsamplerKey]; ok { + if existing, ok := entry.dynsampler.(ST); ok { + return existing, entry.recorder + } } - return dynsamplerInstance + dynsamplerInstance := create(config) + r := &dynsamplerMetricsRecorder{prefix: prefix, met: s.Metrics} + r.RegisterMetrics(dynsamplerInstance) + s.sharedDynsamplers[dynsamplerKey] = sharedDynsamplerEntry{dynsampler: dynsamplerInstance, recorder: r} + return dynsamplerInstance, r +} + +// makeDynsamplerKey builds a dynsampler map key with a sorted copy of fieldList so that +// configs with the same fields in different order always map to the same instance. +func makeDynsamplerKey(prefix, samplerType string, rate int64, fieldList []string) string { + sorted := make([]string, len(fieldList)) + copy(sorted, fieldList) + slices.Sort(sorted) + return fmt.Sprintf("%s:%s:%d:%v", prefix, samplerType, rate, sorted) } // createSampler creates a sampler with shared dynsamplers based on the config type. @@ -107,45 +133,45 @@ func (s *SamplerFactory) createSampler(c any, keyPrefix string) Sampler { case *config.DeterministicSamplerConfig: sampler = &DeterministicSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics} case *config.DynamicSamplerConfig: - dynsamplerKey := fmt.Sprintf("%s:dynamic:%d:%v", keyPrefix, c.SampleRate, c.FieldList) - dynsamplerInstance := getSharedDynsampler(s, dynsamplerKey, c, createDynForDynamicSampler) - sampler = &DynamicSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance} + dynsamplerKey := makeDynsamplerKey(keyPrefix, "dynamic", c.SampleRate, c.FieldList) + dynsamplerInstance, recorder := getSharedDynsamplerAndRecorder(s, dynsamplerKey, "dynamic", c, createDynForDynamicSampler) + sampler = &DynamicSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance, metricsRecorder: recorder} case *config.EMADynamicSamplerConfig: - dynsamplerKey := fmt.Sprintf("%s:emadynamic:%d:%v", keyPrefix, c.GoalSampleRate, c.FieldList) - dynsamplerInstance := getSharedDynsampler(s, dynsamplerKey, c, createDynForEMADynamicSampler) - sampler = &EMADynamicSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance} + dynsamplerKey := makeDynsamplerKey(keyPrefix, "emadynamic", int64(c.GoalSampleRate), c.FieldList) + dynsamplerInstance, recorder := getSharedDynsamplerAndRecorder(s, dynsamplerKey, "emadynamic", c, createDynForEMADynamicSampler) + sampler = &EMADynamicSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance, metricsRecorder: recorder} case *config.RulesBasedSamplerConfig: sampler = &RulesBasedSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, SamplerFactory: s, samplerPrefix: keyPrefix} case *config.TotalThroughputSamplerConfig: - dynsamplerKey := fmt.Sprintf("%s:totalthroughput:%d:%v", keyPrefix, c.GoalThroughputPerSec, c.FieldList) - dynsamplerInstance := getSharedDynsampler(s, dynsamplerKey, c, createDynForTotalThroughputSampler) + dynsamplerKey := makeDynsamplerKey(keyPrefix, "totalthroughput", int64(c.GoalThroughputPerSec), c.FieldList) + dynsamplerInstance, recorder := getSharedDynsamplerAndRecorder(s, dynsamplerKey, "totalthroughput", c, createDynForTotalThroughputSampler) // only track goal throughput config if we need to recalculate it later based on cluster size if c.UseClusterSize { s.mutex.Lock() s.goalThroughputConfigs[dynsamplerKey] = c.GoalThroughputPerSec s.mutex.Unlock() } - sampler = &TotalThroughputSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance} + sampler = &TotalThroughputSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance, metricsRecorder: recorder} case *config.EMAThroughputSamplerConfig: - dynsamplerKey := fmt.Sprintf("%s:emathroughput:%d:%v", keyPrefix, c.GoalThroughputPerSec, c.FieldList) - dynsamplerInstance := getSharedDynsampler(s, dynsamplerKey, c, createDynForEMAThroughputSampler) + dynsamplerKey := makeDynsamplerKey(keyPrefix, "emathroughput", int64(c.GoalThroughputPerSec), c.FieldList) + dynsamplerInstance, recorder := getSharedDynsamplerAndRecorder(s, dynsamplerKey, "emathroughput", c, createDynForEMAThroughputSampler) // only track goal throughput config if we need to recalculate it later based on cluster size if c.UseClusterSize { s.mutex.Lock() s.goalThroughputConfigs[dynsamplerKey] = c.GoalThroughputPerSec s.mutex.Unlock() } - sampler = &EMAThroughputSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance} + sampler = &EMAThroughputSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance, metricsRecorder: recorder} case *config.WindowedThroughputSamplerConfig: - dynsamplerKey := fmt.Sprintf("%s:windowedthroughput:%d:%v", keyPrefix, c.GoalThroughputPerSec, c.FieldList) - dynsamplerInstance := getSharedDynsampler(s, dynsamplerKey, c, createDynForWindowedThroughputSampler) + dynsamplerKey := makeDynsamplerKey(keyPrefix, "windowedthroughput", int64(c.GoalThroughputPerSec), c.FieldList) + dynsamplerInstance, recorder := getSharedDynsamplerAndRecorder(s, dynsamplerKey, "windowedthroughput", c, createDynForWindowedThroughputSampler) // only track goal throughput config if we need to recalculate it later based on cluster size if c.UseClusterSize { s.mutex.Lock() s.goalThroughputConfigs[dynsamplerKey] = c.GoalThroughputPerSec s.mutex.Unlock() } - sampler = &WindowedThroughputSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance} + sampler = &WindowedThroughputSampler{Config: c, Logger: s.Logger, Metrics: s.Metrics, dynsampler: dynsamplerInstance, metricsRecorder: recorder} default: s.Logger.Error().Logf("unknown sampler type %T. Exiting.", c) os.Exit(1) @@ -161,6 +187,7 @@ func (s *SamplerFactory) createSampler(c any, keyPrefix string) Sampler { s.Logger.Debug().WithField("dataset", keyPrefix).Logf("created implementation for sampler type %T", c) // Update peer counts after creating a sampler s.updatePeerCounts() + s.Metrics.Gauge("unique_dynsampler_count", float64(len(s.sharedDynsamplers))) return sampler } @@ -211,8 +238,8 @@ func (s *SamplerFactory) ClearDynsamplers() { defer s.mutex.Unlock() // Stop all shared dynsamplers - for _, dynSampler := range s.sharedDynsamplers { - if stopper, ok := dynSampler.(interface{ Stop() }); ok { + for _, entry := range s.sharedDynsamplers { + if stopper, ok := entry.dynsampler.(interface{ Stop() }); ok { stopper.Stop() } } @@ -247,6 +274,7 @@ type internalDysamplerMetric struct { } type dynsamplerMetricsRecorder struct { + mu sync.Mutex prefix string dynPrefix string // Used for accessing metrics from dynsampler-go // Stores the last recorded internal metrics produced by dynsampler-go @@ -258,8 +286,8 @@ type dynsamplerMetricsRecorder struct { // RegisterMetrics registers the metrics that will be recorded by this package. // It initializes the necessary metrics and prepares them for recording. // It MUST be called before any calls to RecordMetrics. +// This function is not concurrency safe. func (d *dynsamplerMetricsRecorder) RegisterMetrics(sampler dynsampler.Sampler) { - // Register statistics this package will produce d.dynPrefix = d.prefix + "_" d.lastMetrics = make(map[string]internalDysamplerMetric) dynInternalMetrics := sampler.GetMetrics(d.dynPrefix) @@ -274,6 +302,7 @@ func (d *dynsamplerMetricsRecorder) RegisterMetrics(sampler dynsampler.Sampler) } func (d *dynsamplerMetricsRecorder) RecordMetrics(sampler dynsampler.Sampler, kept bool, rate uint, numTraceKey int) { + d.mu.Lock() for name, val := range sampler.GetMetrics(d.dynPrefix) { m := d.lastMetrics[name] switch m.metricType { @@ -286,6 +315,7 @@ func (d *dynsamplerMetricsRecorder) RecordMetrics(sampler dynsampler.Sampler, ke d.met.Gauge(name, float64(val)) } } + d.mu.Unlock() if kept { d.met.Increment(d.metricNames.numKept) diff --git a/sample/sample_test.go b/sample/sample_test.go index 7afbca5ad8..5124e06f91 100644 --- a/sample/sample_test.go +++ b/sample/sample_test.go @@ -462,6 +462,73 @@ func TestDifferentDatasetsShouldNotShareDynsampler(t *testing.T) { assert.Equal(t, prodImpl.dynsampler.GoalThroughputPerSec, dogfoodImpl.dynsampler.GoalThroughputPerSec) } +// TestFieldListOrderDoesNotAffectDynsamplerSharing verifies that two sampler configs with identical +// FieldList entries in different order share the same dynsampler instance. +func TestFieldListOrderDoesNotAffectDynsamplerSharing(t *testing.T) { + fields1 := []string{"service.name", "http.method", "status.code"} + fields2 := []string{"status.code", "service.name", "http.method"} + + newFactory := func() *SamplerFactory { + factory := &SamplerFactory{ + Logger: &logger.NullLogger{}, + Metrics: &metrics.NullMetrics{}, + } + factory.Start() + t.Cleanup(factory.Stop) + return factory + } + + t.Run("DynamicSampler", func(t *testing.T) { + f := newFactory() + s1 := f.createSampler(&config.DynamicSamplerConfig{SampleRate: 10, FieldList: fields1}, "env") + s2 := f.createSampler(&config.DynamicSamplerConfig{SampleRate: 10, FieldList: fields2}, "env") + require.NotNil(t, s1) + require.NotNil(t, s2) + assert.Same(t, s1.(*DynamicSampler).dynsampler, s2.(*DynamicSampler).dynsampler) + assert.Len(t, f.sharedDynsamplers, 1) + }) + + t.Run("EMADynamicSampler", func(t *testing.T) { + f := newFactory() + s1 := f.createSampler(&config.EMADynamicSamplerConfig{GoalSampleRate: 10, FieldList: fields1}, "env") + s2 := f.createSampler(&config.EMADynamicSamplerConfig{GoalSampleRate: 10, FieldList: fields2}, "env") + require.NotNil(t, s1) + require.NotNil(t, s2) + assert.Same(t, s1.(*EMADynamicSampler).dynsampler, s2.(*EMADynamicSampler).dynsampler) + assert.Len(t, f.sharedDynsamplers, 1) + }) + + t.Run("TotalThroughputSampler", func(t *testing.T) { + f := newFactory() + s1 := f.createSampler(&config.TotalThroughputSamplerConfig{GoalThroughputPerSec: 10, FieldList: fields1}, "env") + s2 := f.createSampler(&config.TotalThroughputSamplerConfig{GoalThroughputPerSec: 10, FieldList: fields2}, "env") + require.NotNil(t, s1) + require.NotNil(t, s2) + assert.Same(t, s1.(*TotalThroughputSampler).dynsampler, s2.(*TotalThroughputSampler).dynsampler) + assert.Len(t, f.sharedDynsamplers, 1) + }) + + t.Run("EMAThroughputSampler", func(t *testing.T) { + f := newFactory() + s1 := f.createSampler(&config.EMAThroughputSamplerConfig{GoalThroughputPerSec: 10, FieldList: fields1}, "env") + s2 := f.createSampler(&config.EMAThroughputSamplerConfig{GoalThroughputPerSec: 10, FieldList: fields2}, "env") + require.NotNil(t, s1) + require.NotNil(t, s2) + assert.Same(t, s1.(*EMAThroughputSampler).dynsampler, s2.(*EMAThroughputSampler).dynsampler) + assert.Len(t, f.sharedDynsamplers, 1) + }) + + t.Run("WindowedThroughputSampler", func(t *testing.T) { + f := newFactory() + s1 := f.createSampler(&config.WindowedThroughputSamplerConfig{GoalThroughputPerSec: 10, FieldList: fields1}, "env") + s2 := f.createSampler(&config.WindowedThroughputSamplerConfig{GoalThroughputPerSec: 10, FieldList: fields2}, "env") + require.NotNil(t, s1) + require.NotNil(t, s2) + assert.Same(t, s1.(*WindowedThroughputSampler).dynsampler, s2.(*WindowedThroughputSampler).dynsampler) + assert.Len(t, f.sharedDynsamplers, 1) + }) +} + // TestClusterSizeUpdatesSamplers verifies that the SamplerFactory properly handles dynamic peer updates // and their impact on throughput-based sampling behavior. func TestClusterSizeUpdatesSamplers(t *testing.T) { diff --git a/sample/totalthroughput.go b/sample/totalthroughput.go index c69294a835..686d176eae 100644 --- a/sample/totalthroughput.go +++ b/sample/totalthroughput.go @@ -57,12 +57,13 @@ func (d *TotalThroughputSampler) Start() error { d.key = newTraceKey(d.Config.FieldList, d.Config.UseTraceLength) d.keyFields, d.nonRootFields = config.GetKeyFields(d.Config.GetSamplingFields()) - // Register statistics this package will produce - d.metricsRecorder = &dynsamplerMetricsRecorder{ - prefix: "totalthroughput", - met: d.Metrics, + if d.metricsRecorder == nil { + d.metricsRecorder = &dynsamplerMetricsRecorder{ + prefix: "totalthroughput", + met: d.Metrics, + } + d.metricsRecorder.RegisterMetrics(d.dynsampler) } - d.metricsRecorder.RegisterMetrics(d.dynsampler) return nil } diff --git a/sample/trace_key.go b/sample/trace_key.go index 66c42b2eb4..44f617532a 100644 --- a/sample/trace_key.go +++ b/sample/trace_key.go @@ -28,10 +28,13 @@ type traceKey struct { func newTraceKey(fields []string, useTraceLength bool) *traceKey { // always put the field list in sorted order for easier comparison - sort.Strings(fields) - rootOnlyFields := make([]string, 0, len(fields)/2) - nonRootFields := make([]string, 0, len(fields)/2) - for _, field := range fields { + copiedFields := make([]string, len(fields)) + copy(copiedFields, fields) + sort.Strings(copiedFields) + + rootOnlyFields := make([]string, 0, len(copiedFields)/2) + nonRootFields := make([]string, 0, len(copiedFields)/2) + for _, field := range copiedFields { if strings.HasPrefix(field, config.RootPrefix) { rootOnlyFields = append(rootOnlyFields, field[len(config.RootPrefix):]) continue diff --git a/sample/windowed_throughput.go b/sample/windowed_throughput.go index adaf019be4..11a35e49d0 100644 --- a/sample/windowed_throughput.go +++ b/sample/windowed_throughput.go @@ -54,12 +54,13 @@ func (d *WindowedThroughputSampler) Start() error { d.key = newTraceKey(d.Config.FieldList, d.Config.UseTraceLength) d.keyFields, d.nonRootFields = config.GetKeyFields(d.Config.GetSamplingFields()) - // Register statistics this package will produce - d.metricsRecorder = &dynsamplerMetricsRecorder{ - prefix: "windowedthroughput", - met: d.Metrics, + if d.metricsRecorder == nil { + d.metricsRecorder = &dynsamplerMetricsRecorder{ + prefix: "windowedthroughput", + met: d.Metrics, + } + d.metricsRecorder.RegisterMetrics(d.dynsampler) } - d.metricsRecorder.RegisterMetrics(d.dynsampler) return nil } diff --git a/tools/convert/configDataNames.txt b/tools/convert/configDataNames.txt index 793d4bb1e7..56b980702d 100644 --- a/tools/convert/configDataNames.txt +++ b/tools/convert/configDataNames.txt @@ -1,5 +1,5 @@ # Names of groups and fields in the new config file format. -# Automatically generated on 2026-02-25 at 20:49:25 UTC. +# Automatically generated on 2026-06-01 at 19:01:21 UTC. General: - ConfigurationVersion @@ -34,6 +34,8 @@ OpAMP: AccessKeys: - ReceiveKeys (originally APIKeys) + - ReceiveKeyIDs + - AcceptOnlyListedKeys - SendKey @@ -136,6 +138,8 @@ OTelMetrics: - Compression + - AdditionalAttributes + OTelTracing: - Enabled @@ -246,6 +250,8 @@ IDFields: - ParentNames + - SpanNames + GRPCServerParameters: - Enabled diff --git a/tools/convert/metricsMeta.yaml b/tools/convert/metricsMeta.yaml index 67fadc5de3..032c6d9025 100644 --- a/tools/convert/metricsMeta.yaml +++ b/tools/convert/metricsMeta.yaml @@ -87,10 +87,18 @@ complete: type: Gauge unit: Dimensionless description: number of spans in the peer queue + - name: collector_peer_queue_capacity + type: Gauge + unit: Dimensionless + description: configured maximum number of spans in the peer queue - name: collector_incoming_queue_length type: Gauge unit: Dimensionless description: number of spans in the incoming queue + - name: collector_incoming_queue_capacity + type: Gauge + unit: Dimensionless + description: configured maximum number of spans in the incoming queue - name: collector_peer_queue type: Histogram unit: Dimensionless @@ -107,6 +115,10 @@ complete: type: Gauge unit: Bytes description: current heap allocation + - name: memory_limit + type: Gauge + unit: Bytes + description: configured maximum memory allocation for the collector (derived from MaxAlloc or AvailableMemory * MaxMemoryPercentage) - name: span_received type: Counter unit: Dimensionless @@ -175,6 +187,10 @@ complete: type: Counter unit: Dimensionless description: number of spans kept due to stress relief + - name: events_dropped + type: Counter + unit: Dimensionless + description: number of events dropped - name: trace_kept_sample_rate type: Histogram unit: Dimensionless diff --git a/tools/convert/minimal_config.yaml b/tools/convert/minimal_config.yaml index eb49635629..6e43f8df48 100644 --- a/tools/convert/minimal_config.yaml +++ b/tools/convert/minimal_config.yaml @@ -1,5 +1,5 @@ # sample uncommented config file containing all possible fields -# automatically generated on 2026-02-25 at 20:49:25 UTC +# automatically generated on 2026-06-01 at 19:01:21 UTC General: ConfigurationVersion: 2 MinRefineryVersion: "v2.0" @@ -18,6 +18,9 @@ AccessKeys: ReceiveKeys: - "your-key-goes-here" + ReceiveKeyIDs: + - "your-key-id-goes-here" + AcceptOnlyListedKeys: false SendKey: SetThisToAHoneycombKey SendKeyMode: none @@ -68,6 +71,10 @@ OTelMetrics: Dataset: "Refinery Metrics" ReportingInterval: 30s Compression: gzip + AdditionalAttributes: + "pipeline.id": "'12345'" + "rollout.id": "'67890'" + OTelTracing: Enabled: false APIHost: "https://api.honeycomb.io" @@ -121,6 +128,10 @@ IDFields: - "trace.parent_id" - parentId + SpanNames: + - "trace.span_id" + - spanId + GRPCServerParameters: Enabled: true ListenAddr: "" diff --git a/tools/convert/templates/configV2.tmpl b/tools/convert/templates/configV2.tmpl index 7f91a2f2a5..b5d937fe5f 100644 --- a/tools/convert/templates/configV2.tmpl +++ b/tools/convert/templates/configV2.tmpl @@ -2,7 +2,7 @@ ## Honeycomb Refinery Configuration ## ###################################### # -# created {{ now }} from {{ .Input }} using a template generated on 2026-02-25 at 20:49:24 UTC +# created {{ now }} from {{ .Input }} using a template generated on 2026-06-01 at 19:01:21 UTC # This file contains a configuration for the Honeycomb Refinery. It is in YAML # format, organized into named groups, each of which contains a set of @@ -165,15 +165,33 @@ AccessKeys: ## will be proxied through to the upstream API directly without modifying ## keys. ## - ## Not eligible for live reload. + ## Eligible for live reload. {{ renderStringarray .Data "ReceiveKeys" "APIKeys" "your-key-goes-here" }} + ## ReceiveKeyIDs is a set of Honeycomb Ingest Key IDs that the proxy will + ## treat specially. + ## + ## When `AcceptOnlyListedKeys` is `true`, traffic using an API key whose + ## Honeycomb ingest key ID matches an entry in this list will be + ## accepted. The key ID is the `id` field returned by the Honeycomb + ## `/1/auth` endpoint; it is distinct from the full API key value. + ## This allows authorization based on key IDs rather than full key + ## values, which avoids storing secret key material in the configuration + ## file. Both `ReceiveKeys` and `ReceiveKeyIDs` may be used + ## simultaneously. + ## Note: This feature does not support legacy API keys. Only Honeycomb + ## Ingest Keys (which have a key ID) are compatible with this setting. + ## + ## Eligible for live reload. + {{ renderStringarray .Data "ReceiveKeyIDs" "ReceiveKeyIDs" "your-key-id-goes-here" }} + ## AcceptOnlyListedKeys is a boolean flag that causes events arriving ## with API keys not in the `ReceiveKeys` list to be rejected. ## - ## If `true`, then only traffic using the keys listed in `ReceiveKeys` is - ## accepted. Events arriving with API keys not in the `ReceiveKeys` list - ## will be rejected with an HTTP `401` error. + ## If `true`, then only traffic using the keys listed in `ReceiveKeys` or + ## whose key ID is listed in `ReceiveKeyIDs` is accepted. Events arriving + ## with API keys not in either list will be rejected with an HTTP `401` + ## error. ## If `false`, then all traffic is accepted and `ReceiveKeys` is ignored. ## This setting is applied **before** the `SendKey` and `SendKeyMode` ## settings. @@ -690,6 +708,22 @@ OTelMetrics: ## Options: none gzip {{ choice .Data "Compression" "Compression" (makeSlice "none" "gzip") "gzip" }} + ## AdditionalAttributes adds the provided attributes as resource + ## attributes on all OpenTelemetry metrics emitted by Refinery. + ## + ## This is useful for injecting deployment-specific metadata (such as a + ## cluster ID or environment name) into metrics so they can be filtered + ## or grouped in the metrics backend. Both keys and values must be + ## strings. + ## When supplying via a environment variable, the value should be a + ## string of comma-separated key-value pairs. When supplying via the + ## command line, the value should be a key value pair. If multiple + ## key-value pairs are needed, each should be supplied via its own + ## command line flag. The key-value pairs must use ':' as the separator. + ## + ## Not eligible for live reload. + {{ renderMap .Data "AdditionalAttributes" "AdditionalAttributes" "pipeline.id:'12345',rollout.id:'67890'" }} + ########################### ## OpenTelemetry Tracing ## ########################### @@ -1131,6 +1165,16 @@ IDFields: ## Eligible for live reload. {{ renderStringarray .Data "ParentNames" "ParentNames" "trace.parent_id,parentId" }} + ## SpanNames is the list of field names to use for the span ID. + ## + ## The first field in the list that is present on a span will be used as + ## that span's ID. This is required for `SpanCounters` entries that set + ## `ScopeConditions` (per-anchor subtree counting), which must resolve + ## each span's parent ID to a span ID in the same trace. + ## + ## Eligible for live reload. + {{ renderStringarray .Data "SpanNames" "SpanNames" "trace.span_id,spanId" }} + ############################ ## gRPC Server Parameters ## ############################ diff --git a/transmit/direct_transmit.go b/transmit/direct_transmit.go index 46d36ec572..607250bab1 100644 --- a/transmit/direct_transmit.go +++ b/transmit/direct_transmit.go @@ -316,24 +316,19 @@ func (d *DirectTransmission) Stop() error { return nil } -// handleBatchFailure handles metrics updates when the entire batch fails -func (d *DirectTransmission) handleBatchFailure(batch []*types.Event) { - d.Metrics.Increment(d.metricKeys.counterSendErrors) - for range batch { - d.Metrics.Down(d.metricKeys.updownQueuedItems) - } -} - -// handleEventError logs an error and updates metrics for a single event -func (d *DirectTransmission) handleEventError(ev *types.Event, statusCode int, queueTime int64, errorMsg string, responseBody []byte) { +// handleError logs an error with common fields and custom message +func (d *DirectTransmission) handleError(ev *types.Event, statusCode int, queueTime int64, errorMsg string, responseBody []byte, logMessage string) { log := d.Logger.Error().WithFields(map[string]any{ - "status_code": statusCode, "api_host": ev.APIHost, "dataset": ev.Dataset, "environment": ev.Environment, "roundtrip_usec": queueTime, }) + if statusCode > 0 { + log = log.WithField("status_code", statusCode) + } + if errorMsg != "" { log = log.WithField("error", errorMsg) } @@ -350,7 +345,30 @@ func (d *DirectTransmission) handleEventError(ev *types.Event, statusCode int, q } } - log.Logf("error when sending event") + log.Logf(logMessage) +} + +// handleBatchFailure handles metrics updates when the entire batch fails +func (d *DirectTransmission) handleBatchFailure(batch []*types.Event, errorMsg string, logMessage string) { + d.Metrics.Increment(d.metricKeys.counterSendErrors) + now := time.Now().UnixMicro() + if len(batch) > 0 { + queueTime := now - batch[0].EnqueuedUnixMicro + d.handleError(batch[0], 0, queueTime, errorMsg, nil, logMessage) + } + + for _, ev := range batch { + d.Metrics.Histogram(d.metricKeys.histogramQueueTime, float64(now-ev.EnqueuedUnixMicro)) + d.Metrics.Down(d.metricKeys.updownQueuedItems) + } +} + +// handleEventError logs an error and updates metrics for a single event +func (d *DirectTransmission) handleEventError(ev *types.Event, statusCode int, queueTime int64, errorMsg string, responseBody []byte, logMessage string) { + if logMessage == "" { + logMessage = "error when sending event" + } + d.handleError(ev, statusCode, queueTime, errorMsg, responseBody, logMessage) d.Metrics.Increment(d.metricKeys.counterResponseErrors) d.Metrics.Down(d.metricKeys.updownQueuedItems) d.Metrics.Histogram(d.metricKeys.histogramQueueTime, float64(queueTime)) @@ -407,9 +425,7 @@ func (d *DirectTransmission) sendBatch(wholeBatch []*types.Event) { if err != nil { // Skip this message and remove it from the list, so we don't // try to account for it again. - d.Logger.Error().WithField("err", err.Error()).Logf("failed to marshal event") - d.Metrics.Down(d.metricKeys.updownQueuedItems) - d.Metrics.Increment(d.metricKeys.counterResponseErrors) + d.handleEventError(wholeBatch[i], 0, time.Now().UnixMicro()-wholeBatch[i].EnqueuedUnixMicro, err.Error(), nil, "failed to marshal event") continue } if len(newPacked) > apiMaxBatchSize { @@ -440,8 +456,7 @@ func (d *DirectTransmission) sendBatch(wholeBatch []*types.Event) { apiURL, err := buildRequestURL(apiHost, dataset) if err != nil { - d.Logger.Error().WithField("err", err.Error()).Logf("failed to create request URL") - d.handleBatchFailure(subBatch) + d.handleBatchFailure(subBatch, err.Error(), "failed to create request URL") continue } @@ -471,8 +486,7 @@ func (d *DirectTransmission) sendBatch(wholeBatch []*types.Event) { req, err = http.NewRequest("POST", apiURL, readerPtr) if err != nil { - d.Logger.Error().WithField("err", err.Error()).Logf("failed to create request") - d.handleBatchFailure(subBatch) + d.handleBatchFailure(subBatch, err.Error(), "failed to create request") break } @@ -523,13 +537,7 @@ func (d *DirectTransmission) sendBatch(wholeBatch []*types.Event) { dequeuedAt := d.Clock.Now() if err != nil { - d.Logger.Error().WithField("err", err.Error()).Logf("http POST failed") - - // Network/connection error - affects all events in batch - for _, ev := range subBatch { - queueTime := dequeuedAt.UnixMicro() - ev.EnqueuedUnixMicro - d.handleEventError(ev, 0, queueTime, err.Error(), nil) - } + d.handleBatchFailure(subBatch, err.Error(), "") continue } @@ -544,15 +552,18 @@ func (d *DirectTransmission) sendBatch(wholeBatch []*types.Event) { if resp.Header.Get("Content-Type") == "application/msgpack" { err = msgpack.NewDecoder(resp.Body).Decode(&batchResponses) if err != nil { + // This is an error from processing response body, not an error from sending events. No need to include event information here d.Logger.Error().WithField("err", err.Error()).Logf("failed to decode msgpack batch response") } } else { bodyBytes, err := io.ReadAll(resp.Body) if err != nil { + // This is an error from processing response body, not an error from sending events. No need to include event information here d.Logger.Error().WithField("err", err.Error()).Logf("failed to read response body") } else { err = json.Unmarshal(bodyBytes, &batchResponses) if err != nil { + // This is an error from processing response body, not an error from sending events. No need to include event information here d.Logger.Error().WithField("err", err.Error()).Logf("failed to decode JSON batch response") } } @@ -569,12 +580,12 @@ func (d *DirectTransmission) sendBatch(wholeBatch []*types.Event) { // Check if we have a response for this event if i >= len(batchResponses) { // Missing response - treat as server error - d.handleEventError(ev, http.StatusInternalServerError, queueTime, "insufficient responses from server", nil) + d.handleEventError(ev, http.StatusInternalServerError, queueTime, "insufficient responses from server", nil, "insufficient responses from server") continue } if batchResponses[i].Status != http.StatusAccepted { - d.handleEventError(ev, batchResponses[i].Status, queueTime, "", nil) + d.handleEventError(ev, batchResponses[i].Status, queueTime, "", nil, "") } else { // Success d.Metrics.Increment(d.metricKeys.counterResponse20x) @@ -610,7 +621,7 @@ func (d *DirectTransmission) sendBatch(wholeBatch []*types.Event) { for _, ev := range subBatch { queueTime := dequeuedAt.UnixMicro() - ev.EnqueuedUnixMicro - d.handleEventError(ev, resp.StatusCode, queueTime, "", bodyBytes) + d.handleEventError(ev, resp.StatusCode, queueTime, "", bodyBytes, "") } } } diff --git a/transmit/direct_transmit_test.go b/transmit/direct_transmit_test.go index 3cbe9aa9c7..4013d356b6 100644 --- a/transmit/direct_transmit_test.go +++ b/transmit/direct_transmit_test.go @@ -239,6 +239,12 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { defer errorServer.Close() dt, mockMetrics, mockLogger := setupDirectTransmissionTest(t) + + // Configure AdditionalErrorFields + dt.Config = &config.MockConfig{ + AdditionalErrorFields: []string{"event_id"}, + } + // Send 4 events to ensure we get 2 successes and 2 errors sendTestEvents(dt, errorServer.URL, 4, "test-api-key") err := dt.Stop() @@ -267,6 +273,9 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { assert.Equal(t, "test-dataset", errorEvent.Fields["dataset"]) assert.Equal(t, "test", errorEvent.Fields["environment"]) assert.Contains(t, errorEvent.Fields, "roundtrip_usec") + + // Verify AdditionalErrorFields + assert.Contains(t, errorEvent.Fields, "event_id") } }) @@ -280,6 +289,12 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { defer errorServer.Close() dt, mockMetrics, mockLogger := setupDirectTransmissionTest(t) + + // Configure AdditionalErrorFields + dt.Config = &config.MockConfig{ + AdditionalErrorFields: []string{"event_id"}, + } + sendTestEvents(dt, errorServer.URL, 2, "test-api-key") err := dt.Stop() require.NoError(t, err) @@ -303,6 +318,9 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { assert.Equal(t, "error when sending event", errorEvent.Fields["error"]) assert.Equal(t, http.StatusInternalServerError, errorEvent.Fields["status_code"]) assert.Contains(t, errorEvent.Fields, "response_body") + + // Verify AdditionalErrorFields + assert.Contains(t, errorEvent.Fields, "event_id") } }) @@ -365,7 +383,13 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { })) defer msgpackServer.Close() - dt, mockMetrics, _ := setupDirectTransmissionTest(t) + dt, mockMetrics, mockLogger := setupDirectTransmissionTest(t) + + // Configure AdditionalErrorFields + dt.Config = &config.MockConfig{ + AdditionalErrorFields: []string{"event_id"}, + } + sendTestEvents(dt, msgpackServer.URL, 2, "test-api-key") err := dt.Stop() require.NoError(t, err) @@ -380,6 +404,21 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { assert.Equal(t, float64(1), errors) assert.Equal(t, float64(1), batchesSent) // Single batch containing 2 events assert.Equal(t, float64(2), messagesSent) + + // Verify error log has all expected fields + errorEvents := getErrorEvents(mockLogger) + require.Len(t, errorEvents, 1, "Expected one error log for rejected event") + + errorEvent := errorEvents[0] + assert.Equal(t, "error when sending event", errorEvent.Fields["error"]) + assert.Equal(t, http.StatusBadRequest, errorEvent.Fields["status_code"]) + assert.Equal(t, msgpackServer.URL, errorEvent.Fields["api_host"]) + assert.Equal(t, "test-dataset", errorEvent.Fields["dataset"]) + assert.Equal(t, "test", errorEvent.Fields["environment"]) + assert.Contains(t, errorEvent.Fields, "roundtrip_usec") + + // Verify AdditionalErrorFields + assert.Contains(t, errorEvent.Fields, "event_id") }) t.Run("insufficient responses from server", func(t *testing.T) { @@ -393,6 +432,12 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { defer insufficientServer.Close() dt, mockMetrics, mockLogger := setupDirectTransmissionTest(t) + + // Configure AdditionalErrorFields + dt.Config = &config.MockConfig{ + AdditionalErrorFields: []string{"event_id"}, + } + sendTestEvents(dt, insufficientServer.URL, 2, "test-api-key") err := dt.Stop() require.NoError(t, err) @@ -408,14 +453,20 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { assert.Equal(t, float64(1), batchesSent) // Single batch containing 2 events assert.Equal(t, float64(2), messagesSent) - // Verify error log message mentions insufficient responses + // Verify error log has all expected fields errorEvents := getErrorEvents(mockLogger) require.Len(t, errorEvents, 1, "Expected exactly one error log for the missing response") errorEvent := errorEvents[0] - assert.Equal(t, "error when sending event", errorEvent.Fields["error"]) + assert.Equal(t, "insufficient responses from server", errorEvent.Fields["error"]) assert.Equal(t, http.StatusInternalServerError, errorEvent.Fields["status_code"]) + assert.Equal(t, insufficientServer.URL, errorEvent.Fields["api_host"]) + assert.Equal(t, "test-dataset", errorEvent.Fields["dataset"]) + assert.Equal(t, "test", errorEvent.Fields["environment"]) assert.Contains(t, errorEvent.Fields, "roundtrip_usec") + + // Verify AdditionalErrorFields + assert.Contains(t, errorEvent.Fields, "event_id") }) t.Run("response decode errors", func(t *testing.T) { @@ -427,7 +478,13 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { })) defer decodeErrorServer.Close() - dt, mockMetrics, _ := setupDirectTransmissionTest(t) + dt, mockMetrics, mockLogger := setupDirectTransmissionTest(t) + + // Configure AdditionalErrorFields + dt.Config = &config.MockConfig{ + AdditionalErrorFields: []string{"event_id"}, + } + sendTestEvents(dt, decodeErrorServer.URL, 1, "test-api-key") err := dt.Stop() require.NoError(t, err) @@ -440,6 +497,16 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { assert.Equal(t, float64(1), decodeErrors) assert.Equal(t, float64(1), batchesSent) assert.Equal(t, float64(1), messagesSent) + + // Verify decode error log has context fields + var foundErrorLog bool + for _, event := range mockLogger.Events { + if msg, ok := event.Fields["error"].(string); ok && strings.Contains(msg, "failed to decode msgpack batch response") { + foundErrorLog = true + break + } + } + require.True(t, foundErrorLog, "Expected decode error log") }) t.Run("event over 1M size", func(t *testing.T) { @@ -453,8 +520,14 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { dt, mockMetrics, mockLogger := setupDirectTransmissionTest(t) + // Configure AdditionalErrorFields + mockCfg := &config.MockConfig{ + AdditionalErrorFields: []string{"event_id"}, + } + dt.Config = mockCfg + // Create an event with data over 1M - eventData := types.NewPayload(&config.MockConfig{}, map[string]any{ + eventData := types.NewPayload(mockCfg, map[string]any{ "large_field": strings.Repeat("a", 1024*1024+1000), "event_id": 1, }) @@ -480,15 +553,21 @@ func TestDirectTransmissionErrorHandling(t *testing.T) { assert.Equal(t, float64(0), success) assert.Equal(t, float64(1), errors) - // Verify error log message about oversized event - var oversizedFound bool + // Verify error log has all expected fields + var oversizedLog *logger.MockLoggerEvent for _, event := range mockLogger.Events { - if errorMsg, ok := event.Fields["err"].(string); ok && strings.Contains(errorMsg, "exceeds max event size") { - oversizedFound = true + if msg, ok := event.Fields["error"].(string); ok && strings.Contains(msg, "failed to marshal event") { + oversizedLog = event break } } - require.True(t, oversizedFound, "Expected error log for oversized event") + require.NotNil(t, oversizedLog, "Expected error log for oversized event") + + assert.Equal(t, server.URL, oversizedLog.Fields["api_host"]) + assert.Equal(t, "test-dataset", oversizedLog.Fields["dataset"]) + assert.Equal(t, "test", oversizedLog.Fields["environment"]) + assert.Contains(t, oversizedLog.Fields, "roundtrip_usec") + assert.Contains(t, oversizedLog.Fields, "error") }) } @@ -728,7 +807,7 @@ func TestDirectTransmission(t *testing.T) { // Verify all events were queued and dequeued, net = 0 assert.Equal(t, float64(0), queuedItems) // Verify queue time histogram was updated for all events - assert.Equal(t, expectedEvents, mockMetrics.GetHistogramCount(dt.metricKeys.histogramQueueTime)) + assert.Equal(t, len(allEvents), mockMetrics.GetHistogramCount(dt.metricKeys.histogramQueueTime)) // Verify batch and message counts // Dataset A: 5 events -> 2 batches (3+2) @@ -815,7 +894,7 @@ func TestDirectTransmissionBatchSizeLimit(t *testing.T) { assert.Equal(t, float64(expectedEvents), success) assert.Equal(t, float64(len(allEvents)-expectedEvents), errors) assert.Equal(t, float64(0), queuedItems) - assert.Equal(t, expectedEvents, mockMetrics.GetHistogramCount(dt.metricKeys.histogramQueueTime)) + assert.Equal(t, len(allEvents), mockMetrics.GetHistogramCount(dt.metricKeys.histogramQueueTime)) // Verify batch and message counts - events are large so batches will be smaller assert.Greater(t, batchesSent, float64(0), "Should have sent at least one batch") @@ -1129,6 +1208,9 @@ func TestDirectTransmissionRetryLogic(t *testing.T) { if tt.expectSuccess { assert.Contains(t, mockMetrics.CounterIncrements, "libhoney_upstream_response_20x") + } else if tt.statusCode == 0 { + // Network/timeout error: whole batch failed before any response + assert.Contains(t, mockMetrics.CounterIncrements, "libhoney_upstream_send_errors") } else { assert.Contains(t, mockMetrics.CounterIncrements, "libhoney_upstream_response_errors") }