Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 7 additions & 35 deletions cmd/thv-operator/controllers/mcpremoteproxy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ type MCPRemoteProxyReconciler struct {
// +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcpexternalauthconfigs,verbs=get;list;watch
// +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcpoidcconfigs,verbs=get;list;watch
// +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcptelemetryconfigs,verbs=get;list;watch
// +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcpoidcconfigs/status,verbs=get;update;patch
// +kubebuilder:rbac:groups="",resources=configmaps,verbs=create;delete;get;list;patch;update;watch
// +kubebuilder:rbac:groups="",resources=services,verbs=create;delete;get;list;patch;update;watch
// +kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=roles,verbs=create;delete;get;list;patch;update;watch
Expand Down Expand Up @@ -920,11 +919,13 @@ func (r *MCPRemoteProxyReconciler) handleOIDCConfig(ctx context.Context, proxy *
return err
}

// Update ReferencingWorkloads on the MCPOIDCConfig status
if err := r.updateOIDCConfigReferencingWorkloads(ctx, oidcConfig, proxy.Name); err != nil {
ctxLogger.Error(err, "Failed to update MCPOIDCConfig ReferencingWorkloads")
// Non-fatal: continue with reconciliation
}
// ReferencingWorkloads on the MCPOIDCConfig is maintained solely by the
// MCPOIDCConfig controller, which watches MCPServer/VirtualMCPServer/
// MCPRemoteProxy and recomputes the full list (additions and removals). The
// MCPRemoteProxy controller must not write the config's status: a full
// r.Status().Update here would clobber conditions the config controller
// owns, and the previous append-only write never removed stale entries.
// See #5511.

// Detect whether the condition is transitioning to True (e.g. recovering from
// a transient error). Without this check the status update is skipped when the
Expand Down Expand Up @@ -1017,35 +1018,6 @@ func (r *MCPRemoteProxyReconciler) fetchAndValidateOIDCConfig(
return oidcConfig, nil
}

// updateOIDCConfigReferencingWorkloads ensures the MCPRemoteProxy is listed in
// the MCPOIDCConfig's ReferencingWorkloads status field.
func (r *MCPRemoteProxyReconciler) updateOIDCConfigReferencingWorkloads(
ctx context.Context,
oidcConfig *mcpv1beta1.MCPOIDCConfig,
proxyName string,
) error {
ref := mcpv1beta1.WorkloadReference{
Kind: mcpv1beta1.WorkloadKindMCPRemoteProxy,
Name: proxyName,
}

// Check if already listed
for _, entry := range oidcConfig.Status.ReferencingWorkloads {
if entry.Kind == ref.Kind && entry.Name == ref.Name {
return nil
}
}

// Add the workload reference
oidcConfig.Status.ReferencingWorkloads = append(oidcConfig.Status.ReferencingWorkloads, ref)
oidcConfig.Status.ReferenceCount = workloadReferenceCount(oidcConfig.Status.ReferencingWorkloads)
if err := r.Status().Update(ctx, oidcConfig); err != nil {
return fmt.Errorf("failed to update MCPOIDCConfig ReferencingWorkloads: %w", err)
}

return nil
}

// validateGroupRef validates the GroupRef field of the MCPRemoteProxy.
// This function only sets conditions on the proxy object - the caller is responsible
// for persisting the status update to avoid multiple conflicting status updates.
Expand Down
61 changes: 0 additions & 61 deletions cmd/thv-operator/controllers/mcpremoteproxy_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1439,64 +1439,3 @@ func TestGetExternalAuthConfigForMCPRemoteProxy(t *testing.T) {
assert.NotNil(t, result)
assert.Equal(t, "test-auth", result.Name)
}

func TestMCPRemoteProxyReconciler_updateOIDCConfigReferencingWorkloads(t *testing.T) {
t.Parallel()

existingRef := mcpv1beta1.WorkloadReference{
Kind: mcpv1beta1.WorkloadKindMCPRemoteProxy,
Name: "existing",
}
newRef := mcpv1beta1.WorkloadReference{
Kind: mcpv1beta1.WorkloadKindMCPRemoteProxy,
Name: "new",
}

tests := []struct {
name string
proxyName string
expectedRefs []mcpv1beta1.WorkloadReference
expectedCount int32
}{
{
name: "adds new proxy reference",
proxyName: "new",
expectedRefs: []mcpv1beta1.WorkloadReference{existingRef, newRef},
expectedCount: 2,
},
{
name: "does not duplicate existing reference",
proxyName: "existing",
expectedRefs: []mcpv1beta1.WorkloadReference{existingRef},
expectedCount: 1,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

ctx := t.Context()
scheme := runtime.NewScheme()
require.NoError(t, mcpv1beta1.AddToScheme(scheme))

oidcConfig := &mcpv1beta1.MCPOIDCConfig{
ObjectMeta: metav1.ObjectMeta{Name: "cfg", Namespace: "default"},
Status: mcpv1beta1.MCPOIDCConfigStatus{
ReferencingWorkloads: []mcpv1beta1.WorkloadReference{existingRef},
ReferenceCount: 1,
},
}
fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(oidcConfig).
WithStatusSubresource(&mcpv1beta1.MCPOIDCConfig{}).
Build()
reconciler := &MCPRemoteProxyReconciler{Client: fakeClient, Scheme: scheme}

require.NoError(t, reconciler.updateOIDCConfigReferencingWorkloads(ctx, oidcConfig, tt.proxyName))
assert.ElementsMatch(t, tt.expectedRefs, oidcConfig.Status.ReferencingWorkloads)
assert.Equal(t, tt.expectedCount, oidcConfig.Status.ReferenceCount)
})
}
}
42 changes: 7 additions & 35 deletions cmd/thv-operator/controllers/mcpserver_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ func (r *MCPServerReconciler) detectPlatform(ctx context.Context) (kubernetes.Pl
// +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcpservers/finalizers,verbs=update
// +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcptoolconfigs,verbs=get;list;watch
// +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcpoidcconfigs,verbs=get;list;watch
// +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcpoidcconfigs/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcptelemetryconfigs,verbs=get;list;watch
// +kubebuilder:rbac:groups="",resources=configmaps,verbs=create;delete;get;list;patch;update;watch
// +kubebuilder:rbac:groups="",resources=services,verbs=create;delete;get;list;patch;update;watch
Expand Down Expand Up @@ -2276,11 +2275,13 @@ func (r *MCPServerReconciler) handleOIDCConfig(ctx context.Context, m *mcpv1beta
return err
}

// Update ReferencingWorkloads on the MCPOIDCConfig status
if err := r.updateOIDCConfigReferencingWorkloads(ctx, oidcConfig, m.Name); err != nil {
ctxLogger.Error(err, "Failed to update MCPOIDCConfig ReferencingWorkloads")
// Non-fatal: continue with reconciliation
}
// ReferencingWorkloads on the MCPOIDCConfig is maintained solely by the
// MCPOIDCConfig controller, which watches MCPServer/VirtualMCPServer/
// MCPRemoteProxy and recomputes the full list (additions and removals). The
// MCPServer controller must not write the config's status: a full
// r.Status().Update here would clobber conditions the config controller
// owns, and the previous append-only write never removed stale entries.
// See #5511.

// Detect whether the condition is transitioning to True (e.g. recovering from
// a transient error). Without this check the status update is skipped when the
Expand Down Expand Up @@ -2367,35 +2368,6 @@ func setOIDCConfigRefCondition(m *mcpv1beta1.MCPServer, status metav1.ConditionS
})
}

// updateOIDCConfigReferencingWorkloads ensures the MCPServer is listed in
// the MCPOIDCConfig's ReferencingWorkloads status field.
func (r *MCPServerReconciler) updateOIDCConfigReferencingWorkloads(
ctx context.Context,
oidcConfig *mcpv1beta1.MCPOIDCConfig,
serverName string,
) error {
ref := mcpv1beta1.WorkloadReference{
Kind: mcpv1beta1.WorkloadKindMCPServer,
Name: serverName,
}

// Check if already listed
for _, entry := range oidcConfig.Status.ReferencingWorkloads {
if entry.Kind == ref.Kind && entry.Name == ref.Name {
return nil
}
}

// Add the workload reference
oidcConfig.Status.ReferencingWorkloads = append(oidcConfig.Status.ReferencingWorkloads, ref)
oidcConfig.Status.ReferenceCount = workloadReferenceCount(oidcConfig.Status.ReferencingWorkloads)
if err := r.Status().Update(ctx, oidcConfig); err != nil {
return fmt.Errorf("failed to update MCPOIDCConfig ReferencingWorkloads: %w", err)
}

return nil
}

// handleWebhookConfig validates and tracks the hash of the referenced MCPWebhookConfig.
func (r *MCPServerReconciler) handleWebhookConfig(ctx context.Context, m *mcpv1beta1.MCPServer) error {
ctxLogger := log.FromContext(ctx)
Expand Down
163 changes: 87 additions & 76 deletions cmd/thv-operator/controllers/mcpserver_oidcconfig_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,15 @@ func TestMCPServerReconciler_handleOIDCConfig(t *testing.T) {
}}

tests := []struct {
name string
mcpServer *mcpv1beta1.MCPServer
oidcConfig *mcpv1beta1.MCPOIDCConfig
expectError bool
expectErrorContains string
expectHash string
expectHashCleared bool
expectConditionStatus *metav1.ConditionStatus
expectConditionReason string
expectReferencingServer bool
name string
mcpServer *mcpv1beta1.MCPServer
oidcConfig *mcpv1beta1.MCPOIDCConfig
expectError bool
expectErrorContains string
expectHash string
expectHashCleared bool
expectConditionStatus *metav1.ConditionStatus
expectConditionReason string
}{
{
name: "no ref clears previously stored hash",
Expand Down Expand Up @@ -109,10 +108,9 @@ func TestMCPServerReconciler_handleOIDCConfig(t *testing.T) {
Conditions: validOIDCCondition,
},
},
expectHash: "hash-123",
expectConditionStatus: conditionStatusPtr(metav1.ConditionTrue),
expectConditionReason: mcpv1beta1.ConditionReasonOIDCConfigRefValid,
expectReferencingServer: true,
expectHash: "hash-123",
expectConditionStatus: conditionStatusPtr(metav1.ConditionTrue),
expectConditionReason: mcpv1beta1.ConditionReasonOIDCConfigRefValid,
},
{
name: "detects config hash change",
Expand All @@ -137,10 +135,9 @@ func TestMCPServerReconciler_handleOIDCConfig(t *testing.T) {
Conditions: validOIDCCondition,
},
},
expectHash: "new-hash",
expectConditionStatus: conditionStatusPtr(metav1.ConditionTrue),
expectConditionReason: mcpv1beta1.ConditionReasonOIDCConfigRefValid,
expectReferencingServer: true,
expectHash: "new-hash",
expectConditionStatus: conditionStatusPtr(metav1.ConditionTrue),
expectConditionReason: mcpv1beta1.ConditionReasonOIDCConfigRefValid,
},
}

Expand Down Expand Up @@ -200,68 +197,10 @@ func TestMCPServerReconciler_handleOIDCConfig(t *testing.T) {
}
assert.True(t, found, "expected %s condition", mcpv1beta1.ConditionOIDCConfigRefValidated)
}

if tt.expectReferencingServer && tt.oidcConfig != nil {
var updated mcpv1beta1.MCPOIDCConfig
require.NoError(t, fakeClient.Get(ctx, client.ObjectKeyFromObject(tt.oidcConfig), &updated))
expectedRef := mcpv1beta1.WorkloadReference{Kind: "MCPServer", Name: tt.mcpServer.Name}
assert.Contains(t, updated.Status.ReferencingWorkloads, expectedRef)
}
})
}
}

func TestMCPServerReconciler_updateOIDCConfigReferencingWorkloads(t *testing.T) {
t.Parallel()

existingRef := mcpv1beta1.WorkloadReference{Kind: "MCPServer", Name: "existing"}

t.Run("adds new server reference", func(t *testing.T) {
t.Parallel()
ctx := t.Context()
scheme := runtime.NewScheme()
require.NoError(t, mcpv1beta1.AddToScheme(scheme))

cfg := &mcpv1beta1.MCPOIDCConfig{
ObjectMeta: metav1.ObjectMeta{Name: "cfg", Namespace: "default"},
Status: mcpv1beta1.MCPOIDCConfigStatus{
ReferencingWorkloads: []mcpv1beta1.WorkloadReference{existingRef},
ReferenceCount: 1,
},
}
fc := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cfg).
WithStatusSubresource(&mcpv1beta1.MCPOIDCConfig{}).Build()
r := newTestMCPServerReconciler(fc, scheme, kubernetes.PlatformKubernetes)

require.NoError(t, r.updateOIDCConfigReferencingWorkloads(ctx, cfg, "new"))
newRef := mcpv1beta1.WorkloadReference{Kind: "MCPServer", Name: "new"}
assert.ElementsMatch(t, []mcpv1beta1.WorkloadReference{existingRef, newRef}, cfg.Status.ReferencingWorkloads)
assert.EqualValues(t, 2, cfg.Status.ReferenceCount)
})

t.Run("does not duplicate existing reference", func(t *testing.T) {
t.Parallel()
ctx := t.Context()
scheme := runtime.NewScheme()
require.NoError(t, mcpv1beta1.AddToScheme(scheme))

cfg := &mcpv1beta1.MCPOIDCConfig{
ObjectMeta: metav1.ObjectMeta{Name: "cfg", Namespace: "default"},
Status: mcpv1beta1.MCPOIDCConfigStatus{
ReferencingWorkloads: []mcpv1beta1.WorkloadReference{existingRef},
ReferenceCount: 1,
},
}
fc := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cfg).
WithStatusSubresource(&mcpv1beta1.MCPOIDCConfig{}).Build()
r := newTestMCPServerReconciler(fc, scheme, kubernetes.PlatformKubernetes)

require.NoError(t, r.updateOIDCConfigReferencingWorkloads(ctx, cfg, "existing"))
assert.Len(t, cfg.Status.ReferencingWorkloads, 1)
assert.EqualValues(t, 1, cfg.Status.ReferenceCount)
})
}

// TestMCPServerReconciler_handleOIDCConfig_ConditionPersistedOnRecovery verifies that the
// OIDCConfigRefValidated condition is actually persisted to the API server (not just set
// in memory) when recovering from a transient error with an unchanged config hash (#4511).
Expand Down Expand Up @@ -458,3 +397,75 @@ func TestMCPOIDCConfigReconciler_handleDeletion_IgnoresCrossNamespaceRef(t *test
func conditionStatusPtr(s metav1.ConditionStatus) *metav1.ConditionStatus {
return &s
}

// TestMCPServerReconciler_handleOIDCConfig_DoesNotWriteConfigStatus guards the
// trust boundary consolidated in #5511: the MCPServer controller may read the
// referenced MCPOIDCConfig but must never write its status. The MCPOIDCConfig
// controller is the sole owner of that status (conditions and
// ReferencingWorkloads). This unit test is the actual enforcement of that
// boundary — RBAC does not enforce it, because the operator runs as a single
// ServiceAccount whose aggregated role still grants mcpoidcconfigs/status write
// for the MCPOIDCConfig controller's own use. A future reintroduction of a
// cross-controller status write would flip the ResourceVersion and fail here.
func TestMCPServerReconciler_handleOIDCConfig_DoesNotWriteConfigStatus(t *testing.T) {
t.Parallel()
ctx := t.Context()

scheme := runtime.NewScheme()
require.NoError(t, mcpv1beta1.AddToScheme(scheme))
require.NoError(t, corev1.AddToScheme(scheme))

// Seed a config whose status carries a condition and a ReferencingWorkloads
// entry owned by the MCPOIDCConfig controller — neither must be touched.
oidcConfig := &mcpv1beta1.MCPOIDCConfig{
ObjectMeta: metav1.ObjectMeta{Name: "cfg", Namespace: "default"},
Spec: mcpv1beta1.MCPOIDCConfigSpec{
Type: mcpv1beta1.MCPOIDCConfigTypeInline,
Inline: &mcpv1beta1.InlineOIDCSharedConfig{Issuer: "https://x", ClientID: "c"},
},
Status: mcpv1beta1.MCPOIDCConfigStatus{
ConfigHash: "hash-123",
Conditions: []metav1.Condition{
{
Type: mcpv1beta1.ConditionTypeOIDCConfigValid, Status: metav1.ConditionTrue,
Reason: mcpv1beta1.ConditionReasonOIDCConfigValid,
},
{
Type: "ForeignControllerSays", Status: metav1.ConditionTrue,
Reason: "ExternallySet", LastTransitionTime: metav1.Now(),
},
},
ReferencingWorkloads: []mcpv1beta1.WorkloadReference{{Kind: "MCPServer", Name: "someone-else"}},
ReferenceCount: 1,
},
}
mcpServer := &mcpv1beta1.MCPServer{
ObjectMeta: metav1.ObjectMeta{Name: "s", Namespace: "default"},
Spec: mcpv1beta1.MCPServerSpec{
Image: "img",
OIDCConfigRef: &mcpv1beta1.MCPOIDCConfigReference{Name: "cfg", Audience: "aud"},
},
}

fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(oidcConfig, mcpServer).
WithStatusSubresource(&mcpv1beta1.MCPServer{}, &mcpv1beta1.MCPOIDCConfig{}).
Build()
r := newTestMCPServerReconciler(fakeClient, scheme, kubernetes.PlatformKubernetes)

var before mcpv1beta1.MCPOIDCConfig
require.NoError(t, fakeClient.Get(ctx, client.ObjectKeyFromObject(oidcConfig), &before))

require.NoError(t, r.handleOIDCConfig(ctx, mcpServer))

var after mcpv1beta1.MCPOIDCConfig
require.NoError(t, fakeClient.Get(ctx, client.ObjectKeyFromObject(oidcConfig), &after))

assert.Equal(t, before.ResourceVersion, after.ResourceVersion,
"MCPServer reconcile must not write the MCPOIDCConfig — its status is owned by the MCPOIDCConfig controller")
assert.Equal(t, before.Status.ReferencingWorkloads, after.Status.ReferencingWorkloads,
"MCPServer reconcile must not touch the config's ReferencingWorkloads")
assert.NotNil(t, meta.FindStatusCondition(after.Status.Conditions, "ForeignControllerSays"),
"config-owned conditions must remain untouched")
}
Loading
Loading