From 34b0223a265632487664d46e796c567dc5f2c364 Mon Sep 17 00:00:00 2001 From: Trey Date: Tue, 16 Jun 2026 12:38:09 -0700 Subject: [PATCH 1/4] Wire the tool optimizer onto the Serve path Implements changes for issue #5538: - Build a per-session optimizer over core.ListTools on the Serve path and advertise only find_tool/call_tool (sourced from the core) - Route call_tool's inner invocation through core.CallTool by its real name, closing the deferred inner-target admission gap - Skip the session factory's optimizer decorator on the Serve path (FactoryConfig.AdvertiseFromCore) to avoid double-indexing the store - Surface the resolved optimizer factory via Manager.OptimizerFactory; store/cleanup ownership stays in sessionmanager Co-Authored-By: Claude Opus 4.8 (1M context) --- pkg/vmcp/server/serve.go | 17 +- pkg/vmcp/server/serve_handlers.go | 4 +- pkg/vmcp/server/serve_optimizer.go | 197 ++++++++++ pkg/vmcp/server/serve_optimizer_test.go | 363 ++++++++++++++++++ pkg/vmcp/server/server.go | 17 +- pkg/vmcp/server/sessionmanager/factory.go | 17 +- .../server/sessionmanager/session_manager.go | 25 +- pkg/vmcp/session/optimizerdec/decorator.go | 60 +-- 8 files changed, 665 insertions(+), 35 deletions(-) create mode 100644 pkg/vmcp/server/serve_optimizer.go create mode 100644 pkg/vmcp/server/serve_optimizer_test.go diff --git a/pkg/vmcp/server/serve.go b/pkg/vmcp/server/serve.go index 62fba5b375..5d624f19b8 100644 --- a/pkg/vmcp/server/serve.go +++ b/pkg/vmcp/server/serve.go @@ -138,6 +138,12 @@ type ServerConfig struct { // table this path discards — exactly the double-aggregation AC2 forbids. This is an // unenforced contract today because no production composition root wires the Serve path // yet; it becomes load-bearing when one does. + // + // Caller responsibility (optimizer): to enable the optimizer on the Serve path, set + // FactoryConfig.OptimizerConfig/OptimizerFactory AND FactoryConfig.AdvertiseFromCore. + // Serve then builds a per-session optimizer over the core's tools (serve_optimizer.go); + // AdvertiseFromCore suppresses the factory's own optimizer decorator so the shared FTS5 + // store is not double-indexed (see FactoryConfig.AdvertiseFromCore). SessionManagerConfig *sessionmanager.FactoryConfig // TelemetryProvider is the cross-cutting telemetry provider (also consumed by @@ -265,9 +271,14 @@ func Serve(ctx context.Context, v core.VMCP, cfg *ServerConfig) (*Server, error) sessionManager: sessionManager, sessionDataStorage: sessionDataStorage, vmcpSessionMgr: vmcpSessMgr, - ready: make(chan struct{}), - healthMonitor: cfg.HealthMonitor, - statusReporter: cfg.StatusReporter, + // Surface the resolved (telemetry-wrapped) optimizer factory so Serve-path + // session registration builds a per-session optimizer over the core's tools. + // Nil when the optimizer is disabled; the store/cleanup stay owned by the + // session manager (optimizerCleanup, appended to shutdownFuncs below). + optimizerFactory: vmcpSessMgr.OptimizerFactory(), + ready: make(chan struct{}), + healthMonitor: cfg.HealthMonitor, + statusReporter: cfg.StatusReporter, } if optimizerCleanup != nil { diff --git a/pkg/vmcp/server/serve_handlers.go b/pkg/vmcp/server/serve_handlers.go index 3c78844313..ad5bad3949 100644 --- a/pkg/vmcp/server/serve_handlers.go +++ b/pkg/vmcp/server/serve_handlers.go @@ -57,7 +57,9 @@ func (s *Server) injectCoreSessionCapabilities(ctx context.Context, session serv // and passed explicitly to the core — the core never reads it from context. identity, _ := auth.IdentityFromContext(ctx) - tools, err := s.coreSessionTools(ctx, sessionID, identity) + // serveSessionTools returns the core's advertised tools, or — when the optimizer + // is enabled on this path — the find_tool/call_tool meta-tools built over them. + tools, err := s.serveSessionTools(ctx, sessionID, identity) if err != nil { slog.Error("failed to list core tools for session", "session_id", sessionID, "error", err) return err diff --git a/pkg/vmcp/server/serve_optimizer.go b/pkg/vmcp/server/serve_optimizer.go new file mode 100644 index 0000000000..07e9d386a6 --- /dev/null +++ b/pkg/vmcp/server/serve_optimizer.go @@ -0,0 +1,197 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package server + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + + "github.com/stacklok/toolhive/pkg/auth" + "github.com/stacklok/toolhive/pkg/vmcp" + "github.com/stacklok/toolhive/pkg/vmcp/optimizer" + "github.com/stacklok/toolhive/pkg/vmcp/schema" + "github.com/stacklok/toolhive/pkg/vmcp/session/optimizerdec" +) + +// This file holds the Serve-path optimizer wiring. Unlike the legacy server.New +// path — where the optimizer is a session-factory decorator that indexes the +// factory's aggregated tools and replaces MultiSession.Tools() with +// find_tool/call_tool — the Serve path keeps the optimizer a Serve-layer, +// session-scoped index but sources its tool set from the core: +// +// - The advertised set is built from core.ListTools (admission-filtered, +// aggregated, composites included) via coreSessionTools, whose handlers route +// through core.CallTool. +// - call_tool's inner invocation dispatches to that core handler, so the core +// admission seam authorizes the inner target by its real name (closing the +// deferred optimizer-admission gap documented in core/admission.go). +// +// The optimizer is NOT placed in the stateless core: it upserts a session's tools +// into a shared FTS5 store, which is transport/session state. To avoid indexing a +// second, divergent set, the session factory's optimizer decorator is skipped on +// this path (FactoryConfig.AdvertiseFromCore); the resolved factory is consumed +// directly here via s.optimizerFactory. + +// serveSessionTools returns the SDK tools to advertise for a Serve-path session: +// the core's advertised set, or — when the optimizer is enabled — the find_tool / +// call_tool meta-tools built over that set. Both session registration +// (injectCoreSessionCapabilities) and cross-pod re-injection (lazyInjectSessionTools) +// call it, so the two paths advertise an identical set for the same identity. +func (s *Server) serveSessionTools( + ctx context.Context, sessionID string, identity *auth.Identity, +) ([]server.ServerTool, error) { + coreTools, err := s.coreSessionTools(ctx, sessionID, identity) + if err != nil { + return nil, err + } + if s.optimizerFactory == nil { + return coreTools, nil + } + return s.optimizerSessionTools(ctx, sessionID, coreTools) +} + +// optimizerSessionTools builds a per-session optimizer over coreTools (the core's +// advertised set, whose handlers route through core.CallTool) and returns exactly +// the find_tool and call_tool meta-tools. find_tool searches this session's core +// tools; call_tool dispatches the named inner tool through its core handler so the +// inner target is admission-checked by the core. Building the optimizer upserts +// coreTools into the shared store; the returned optimizer is telemetry-wrapped, so +// find_tool/call_tool metrics and traces fire on this path as on the legacy one. +func (s *Server) optimizerSessionTools( + ctx context.Context, sessionID string, coreTools []server.ServerTool, +) ([]server.ServerTool, error) { + opt, err := s.optimizerFactory(ctx, coreTools) + if err != nil { + return nil, fmt.Errorf("build session optimizer: %w", err) + } + + defs := optimizerdec.OptimizerTools() + sdkTools := make([]server.ServerTool, 0, len(defs)) + for _, def := range defs { + schemaJSON, marshalErr := json.Marshal(def.InputSchema) + if marshalErr != nil { + return nil, fmt.Errorf("marshal schema for %s: %w", def.Name, marshalErr) + } + handler, handlerErr := s.optimizerToolHandler(sessionID, def.Name, opt) + if handlerErr != nil { + return nil, handlerErr + } + sdkTools = append(sdkTools, server.ServerTool{ + Tool: mcp.Tool{ + Name: def.Name, + Description: def.Description, + RawInputSchema: schemaJSON, + }, + Handler: handler, + }) + } + + slog.Debug("session optimizer built over core tools", + "session_id", sessionID, "indexed_tool_count", len(coreTools)) + return sdkTools, nil +} + +// optimizerToolHandler returns the SDK handler for a Serve-path optimizer meta-tool. +// It is total over the two names OptimizerTools advertises; any other name is a +// programming error (a definition without a wired handler) and fails registration. +func (s *Server) optimizerToolHandler( + sessionID, toolName string, opt optimizer.Optimizer, +) (server.ToolHandlerFunc, error) { + switch toolName { + case optimizerdec.FindToolName: + return s.optimizerFindToolHandler(sessionID, opt), nil + case optimizerdec.CallToolName: + return s.optimizerCallToolHandler(sessionID, opt), nil + default: + return nil, fmt.Errorf("unknown optimizer meta-tool %q", toolName) + } +} + +// optimizerFindToolHandler builds the find_tool SDK handler. It enforces the +// session's identity binding (anti-hijack) before searching, then returns the +// optimizer's FindToolOutput marshalled as both text and structured content, +// mirroring the legacy optimizerdec handler. +func (s *Server) optimizerFindToolHandler(sessionID string, opt optimizer.Optimizer) server.ToolHandlerFunc { + return func(ctx context.Context, req mcp.CallToolRequest) (*mcp.CallToolResult, error) { + caller, _ := auth.IdentityFromContext(ctx) + if err := s.enforceSessionBinding(sessionID, caller); err != nil { + s.terminateOnBindingFailure(sessionID, optimizerdec.FindToolName, err) + return mcp.NewToolResultError(fmt.Sprintf("Unauthorized: %v", err)), nil + } + + args, ok := req.Params.Arguments.(map[string]any) + if !ok { + return mcp.NewToolResultError( + fmt.Sprintf("%v: arguments must be object, got %T", vmcp.ErrInvalidInput, req.Params.Arguments)), nil + } + + input, err := schema.Translate[optimizer.FindToolInput](args) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("invalid arguments: %v", err)), nil + } + + output, err := opt.FindTool(ctx, input) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("find_tool failed: %v", err)), nil + } + + jsonBytes, err := json.Marshal(output) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("failed to marshal find_tool output: %v", err)), nil + } + + // Unmarshal cannot fail: jsonBytes was just produced by json.Marshal above. + var structured map[string]any + _ = json.Unmarshal(jsonBytes, &structured) + + result := mcp.NewToolResultText(string(jsonBytes)) + result.StructuredContent = structured + return result, nil + } +} + +// optimizerCallToolHandler builds the call_tool SDK handler. It enforces the +// session's identity binding, then delegates to opt.CallTool, which dispatches to +// the inner tool's coreToolHandler — routing through core.CallTool with the real +// inner tool name. The core admission seam authorizes the inner target there; a +// denial surfaces as coreToolHandler's generic "call denied by authorization +// policy" result (the optimizer returns it verbatim), so no authorizer detail +// leaks. The MCP result from the optimizer is returned as-is. +func (s *Server) optimizerCallToolHandler(sessionID string, opt optimizer.Optimizer) server.ToolHandlerFunc { + return func(ctx context.Context, req mcp.CallToolRequest) (*mcp.CallToolResult, error) { + caller, _ := auth.IdentityFromContext(ctx) + if err := s.enforceSessionBinding(sessionID, caller); err != nil { + s.terminateOnBindingFailure(sessionID, optimizerdec.CallToolName, err) + return mcp.NewToolResultError(fmt.Sprintf("Unauthorized: %v", err)), nil + } + + args, ok := req.Params.Arguments.(map[string]any) + if !ok { + return mcp.NewToolResultError( + fmt.Sprintf("%v: arguments must be object, got %T", vmcp.ErrInvalidInput, req.Params.Arguments)), nil + } + + input, err := schema.Translate[optimizer.CallToolInput](args) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("invalid arguments: %v", err)), nil + } + + result, err := opt.CallTool(ctx, input) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf("call_tool failed: %v", err)), nil + } + // Defensive parity with the legacy optimizerdec handler: the production + // optimizer never returns (nil, nil), but guard so a future implementation + // cannot hand a bare nil result to the SDK. + if result == nil { + return mcp.NewToolResultError("call_tool: optimizer returned nil result"), nil + } + return result, nil + } +} diff --git a/pkg/vmcp/server/serve_optimizer_test.go b/pkg/vmcp/server/serve_optimizer_test.go new file mode 100644 index 0000000000..5225d54492 --- /dev/null +++ b/pkg/vmcp/server/serve_optimizer_test.go @@ -0,0 +1,363 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package server + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "sync/atomic" + "testing" + "time" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/mock/gomock" + + "github.com/stacklok/toolhive/pkg/auth" + "github.com/stacklok/toolhive/pkg/vmcp" + "github.com/stacklok/toolhive/pkg/vmcp/optimizer" + "github.com/stacklok/toolhive/pkg/vmcp/server/sessionmanager" + "github.com/stacklok/toolhive/pkg/vmcp/session/optimizerdec" +) + +// These tests cover the Serve-path optimizer wiring (#5538): when the optimizer is +// enabled on the Serve path, tools/list advertises only find_tool/call_tool sourced +// from core.ListTools; call_tool dispatches the inner tool through core.CallTool with +// its real name (closing the deferred inner-target admission gap); identity binding is +// enforced on both meta-tools; and cross-pod re-injection re-advertises the pair. The +// legacy server.New optimizer path is unchanged and covered by +// TestIntegration_SessionManagement_OptimizerMode (s.core == nil). + +// dispatchOptimizer is a test optimizer.Optimizer that mirrors the real optimizer's +// dispatch without its SQLite/embedding store: FindTool returns the tools it was built +// over (so find_tool's result is observable), and CallTool looks the inner tool up by +// name and invokes its handler — exercising the coreToolHandler → core.CallTool path +// that closes the inner-target admission gap. +type dispatchOptimizer struct { + tools map[string]server.ServerTool + defs []mcp.Tool +} + +var _ optimizer.Optimizer = (*dispatchOptimizer)(nil) + +func (o *dispatchOptimizer) FindTool(_ context.Context, _ optimizer.FindToolInput) (*optimizer.FindToolOutput, error) { + return &optimizer.FindToolOutput{Tools: o.defs}, nil +} + +func (o *dispatchOptimizer) CallTool(ctx context.Context, input optimizer.CallToolInput) (*mcp.CallToolResult, error) { + tool, ok := o.tools[input.ToolName] + if !ok { + return mcp.NewToolResultError(fmt.Sprintf("tool not found: %s", input.ToolName)), nil + } + req := mcp.CallToolRequest{} + req.Params.Name = input.ToolName + req.Params.Arguments = input.Parameters + return tool.Handler(ctx, req) +} + +// recordingOptimizerFactory builds dispatchOptimizers and counts how many times it is +// invoked. The count is the double-indexing guard (AC6): on the Serve path the factory +// must be called exactly once per session (by the Serve layer), never also by the +// session-factory decorator. +type recordingOptimizerFactory struct { + calls atomic.Int32 +} + +func (f *recordingOptimizerFactory) build(_ context.Context, tools []server.ServerTool) (optimizer.Optimizer, error) { + f.calls.Add(1) + toolMap := make(map[string]server.ServerTool, len(tools)) + defs := make([]mcp.Tool, 0, len(tools)) + for _, t := range tools { + toolMap[t.Tool.Name] = t + defs = append(defs, t.Tool) + } + return &dispatchOptimizer{tools: toolMap, defs: defs}, nil +} + +var initBody = map[string]any{ + "jsonrpc": "2.0", + "id": 1, + "method": "initialize", + "params": map[string]any{ + "protocolVersion": "2025-06-18", + "capabilities": map[string]any{}, + "clientInfo": map[string]any{"name": "test", "version": "1.0"}, + }, +} + +// registerServeOptimizerSession builds a Serve server with the optimizer enabled +// (OptimizerFactory + AdvertiseFromCore) backed by fc, registers one anonymous session +// via the SDK initialize path, and returns the server, session ID, HTTP base URL, and +// the recording factory. Mirrors registerServeSession but in optimizer mode. +func registerServeOptimizerSession(t *testing.T, fc *fakeCore) (*Server, string, string, *recordingOptimizerFactory) { + t.Helper() + ctrl := gomock.NewController(t) + factory, _ := newToolSessionFactory(t, ctrl, fc.tools) + optFactory := &recordingOptimizerFactory{} + + srv, err := Serve(context.Background(), fc, &ServerConfig{ + SessionTTL: time.Minute, + SessionManagerConfig: &sessionmanager.FactoryConfig{ + Base: factory, + OptimizerFactory: optFactory.build, + AdvertiseFromCore: true, + }, + BackendRegistry: vmcp.NewImmutableRegistry([]vmcp.Backend{}), + }) + require.NoError(t, err) + t.Cleanup(func() { _ = srv.Stop(context.Background()) }) + + streamable := server.NewStreamableHTTPServer( + srv.mcpServer, + server.WithEndpointPath("/mcp"), + server.WithSessionIdManager(srv.vmcpSessionMgr), + ) + ts := httptest.NewServer(streamable) + t.Cleanup(ts.Close) + + initResp := postServeMCP(t, ts.URL, initBody, "") + defer initResp.Body.Close() + require.Equal(t, http.StatusOK, initResp.StatusCode) + sessionID := initResp.Header.Get("Mcp-Session-Id") + require.NotEmpty(t, sessionID) + require.Eventually(t, func() bool { _, ok := srv.vmcpSessionMgr.GetMultiSession(sessionID); return ok }, + 2*time.Second, 10*time.Millisecond, "session should be registered") + return srv, sessionID, ts.URL, optFactory +} + +// optimizerMetaHandlers returns the Serve-path find_tool/call_tool SDK handlers for a +// registered session, keyed by tool name, by invoking the same builder registration +// uses (serveSessionTools). +func optimizerMetaHandlers( + t *testing.T, srv *Server, sessionID string, +) map[string]server.ToolHandlerFunc { + t.Helper() + tools, err := srv.serveSessionTools(context.Background(), sessionID, nil) + require.NoError(t, err) + handlers := make(map[string]server.ToolHandlerFunc, len(tools)) + for _, tool := range tools { + handlers[tool.Tool.Name] = tool.Handler + } + return handlers +} + +// serveToolNames issues a tools/list against a Serve test server and returns the +// advertised tool names. Empty on a non-200 or undecodable response. +func serveToolNames(t *testing.T, baseURL, sessionID string) []string { + t.Helper() + resp := postServeMCP(t, baseURL, map[string]any{ + "jsonrpc": "2.0", + "id": 99, + "method": "tools/list", + "params": map[string]any{}, + }, sessionID) + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil + } + + var body struct { + Result struct { + Tools []struct { + Name string `json:"name"` + } `json:"tools"` + } `json:"result"` + } + if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { + return nil + } + names := make([]string, 0, len(body.Result.Tools)) + for _, tool := range body.Result.Tools { + names = append(names, tool.Name) + } + return names +} + +// TestServeOptimizerAdvertisesOnlyFindAndCallTool is the Serve-path counterpart to +// TestIntegration_SessionManagement_OptimizerMode: with the optimizer enabled, tools/list +// advertises exactly {find_tool, call_tool} and hides the raw core tools. It also proves +// AC6 (no double-indexing): the optimizer factory is invoked exactly once per session — +// by the Serve layer, not also by a session-factory decorator. +func TestServeOptimizerAdvertisesOnlyFindAndCallTool(t *testing.T) { + t.Parallel() + + fc := &fakeCore{tools: []vmcp.Tool{ + {Name: "tool-a", Description: "first"}, + {Name: "tool-b", Description: "second"}, + }} + _, sessionID, baseURL, optFactory := registerServeOptimizerSession(t, fc) + + require.Eventually(t, func() bool { + for _, n := range serveToolNames(t, baseURL, sessionID) { + if n == optimizerdec.FindToolName { + return true + } + } + return false + }, 2*time.Second, 20*time.Millisecond, "find_tool should appear once the optimizer tools are injected") + + names := serveToolNames(t, baseURL, sessionID) + assert.Contains(t, names, optimizerdec.FindToolName) + assert.Contains(t, names, optimizerdec.CallToolName) + assert.NotContains(t, names, "tool-a", "raw core tools must not be directly advertised in optimizer mode") + assert.NotContains(t, names, "tool-b") + assert.Len(t, names, 2, "only find_tool and call_tool should be advertised in optimizer mode") + + // AC6: the factory ran once (Serve layer), not twice (a decorator would double-index). + assert.Equal(t, int32(1), optFactory.calls.Load(), + "the optimizer factory must be invoked exactly once per session (no double-indexing)") + // The advertised set came from the core's single aggregation at registration. + assert.Equal(t, int32(1), fc.listToolsCalls.Load(), + "the optimizer must be built over the core's single registration-time aggregation") +} + +// TestServeOptimizerFindToolReturnsCoreTools proves find_tool searches the core's +// advertised set: the result carries the tools the optimizer was built over. +func TestServeOptimizerFindToolReturnsCoreTools(t *testing.T) { + t.Parallel() + + fc := &fakeCore{tools: []vmcp.Tool{{Name: "tool-a"}, {Name: "tool-b"}}} + srv, sessionID, _, _ := registerServeOptimizerSession(t, fc) + + handler := optimizerMetaHandlers(t, srv, sessionID)[optimizerdec.FindToolName] + require.NotNil(t, handler) + + req := mcp.CallToolRequest{Params: mcp.CallToolParams{ + Name: optimizerdec.FindToolName, + Arguments: map[string]any{"tool_description": "anything"}, + }} + res, err := handler(context.Background(), req) + require.NoError(t, err) + require.NotNil(t, res) + assert.False(t, res.IsError) + + body, err := json.Marshal(res) + require.NoError(t, err) + assert.Contains(t, string(body), "tool-a", "find_tool must return matches from the core's advertised set") + assert.Contains(t, string(body), "tool-b") +} + +// TestServeOptimizerCallToolRoutesThroughCore proves call_tool dispatches the inner +// invocation through core.CallTool with the REAL inner tool name — the path that closes +// the deferred inner-target admission gap (the inner name is what the core's admission +// seam authorizes). +func TestServeOptimizerCallToolRoutesThroughCore(t *testing.T) { + t.Parallel() + + fc := &fakeCore{tools: []vmcp.Tool{{Name: "tool-a"}}} + srv, sessionID, _, _ := registerServeOptimizerSession(t, fc) + + handler := optimizerMetaHandlers(t, srv, sessionID)[optimizerdec.CallToolName] + require.NotNil(t, handler) + + req := mcp.CallToolRequest{Params: mcp.CallToolParams{ + Name: optimizerdec.CallToolName, + Arguments: map[string]any{ + "tool_name": "tool-a", + "parameters": map[string]any{"k": "v"}, + }, + }} + res, err := handler(context.Background(), req) + require.NoError(t, err) + require.NotNil(t, res) + assert.False(t, res.IsError) + + require.Equal(t, int32(1), fc.callToolCalls.Load(), "call_tool must route the inner invocation through core.CallTool") + got, _ := fc.lastCallToolName.Load().(string) + assert.Equal(t, "tool-a", got, "the core must receive the REAL inner tool name, not call_tool") +} + +// TestServeOptimizerCallToolInnerAdmissionDenied proves the closed gap: when the core +// denies the inner target, call_tool returns a generic authorization message and never +// leaks the underlying authorizer detail. +func TestServeOptimizerCallToolInnerAdmissionDenied(t *testing.T) { + t.Parallel() + + fc := &fakeCore{ + tools: []vmcp.Tool{{Name: "tool-a"}}, + callErr: fmt.Errorf("%w: cedar said no", vmcp.ErrAuthorizationFailed), + } + srv, sessionID, _, _ := registerServeOptimizerSession(t, fc) + + handler := optimizerMetaHandlers(t, srv, sessionID)[optimizerdec.CallToolName] + require.NotNil(t, handler) + + req := mcp.CallToolRequest{Params: mcp.CallToolParams{ + Name: optimizerdec.CallToolName, + Arguments: map[string]any{"tool_name": "tool-a", "parameters": map[string]any{}}, + }} + res, err := handler(context.Background(), req) + require.NoError(t, err) + require.NotNil(t, res) + assert.True(t, res.IsError, "an inner-target denial must surface as an error result") + + body, _ := json.Marshal(res) + assert.Contains(t, string(body), "call denied by authorization policy") + assert.NotContains(t, string(body), "cedar said no", "the underlying authorizer detail must not leak") + assert.Equal(t, int32(1), fc.callToolCalls.Load(), "the inner target must reach the core admission seam") +} + +// TestServeOptimizerEnforcesSessionBinding proves both meta-tools enforce the session's +// identity binding (anti-hijack): an attacker presenting a token on an anonymous session +// is rejected, the session is terminated (fail-closed), and no optimizer work is done. +func TestServeOptimizerEnforcesSessionBinding(t *testing.T) { + t.Parallel() + + for _, toolName := range []string{optimizerdec.FindToolName, optimizerdec.CallToolName} { + t.Run(toolName, func(t *testing.T) { + t.Parallel() + fc := &fakeCore{tools: []vmcp.Tool{{Name: "tool-a"}}} + srv, sessionID, _, _ := registerServeOptimizerSession(t, fc) + handler := optimizerMetaHandlers(t, srv, sessionID)[toolName] + require.NotNil(t, handler) + + ctx := auth.WithIdentity(context.Background(), &auth.Identity{Token: "attacker-token"}) + req := mcp.CallToolRequest{Params: mcp.CallToolParams{ + Name: toolName, + Arguments: map[string]any{ + "tool_description": "x", + "tool_name": "tool-a", + "parameters": map[string]any{}, + }, + }} + res, err := handler(ctx, req) + require.NoError(t, err) + require.NotNil(t, res) + assert.True(t, res.IsError) + body, _ := json.Marshal(res) + assert.Contains(t, string(body), "Unauthorized") + + assert.Equal(t, int32(0), fc.callToolCalls.Load(), + "a binding failure must reject before any inner core call") + require.Eventually(t, func() bool { _, ok := srv.vmcpSessionMgr.GetMultiSession(sessionID); return !ok }, + 2*time.Second, 10*time.Millisecond, "a binding failure must terminate the session (fail-closed)") + }) + } +} + +// TestServeOptimizerLazyInjectsForRehydratedSession proves cross-pod re-injection (AC5): +// a fresh SDK session (as on a second pod, where OnRegisterSession never fired) gets +// find_tool/call_tool re-injected — not the raw core tools — rebuilt over a fresh +// core.ListTools. +func TestServeOptimizerLazyInjectsForRehydratedSession(t *testing.T) { + t.Parallel() + + fc := &fakeCore{tools: []vmcp.Tool{{Name: "tool-a"}}} + srv, sessionID, _, _ := registerServeOptimizerSession(t, fc) + + rehydrated := &fakeSDKSession{id: sessionID, tools: map[string]server.ServerTool{}} + srv.lazyInjectSessionTools(srv.mcpServer.WithContext(context.Background(), rehydrated)) + + assert.Contains(t, rehydrated.tools, optimizerdec.FindToolName, + "cross-pod re-injection must advertise find_tool") + assert.Contains(t, rehydrated.tools, optimizerdec.CallToolName, + "cross-pod re-injection must advertise call_tool") + assert.NotContains(t, rehydrated.tools, "tool-a", + "cross-pod re-injection must not advertise raw core tools in optimizer mode") +} diff --git a/pkg/vmcp/server/server.go b/pkg/vmcp/server/server.go index 96aa9c276d..e54e868696 100644 --- a/pkg/vmcp/server/server.go +++ b/pkg/vmcp/server/server.go @@ -220,6 +220,15 @@ type Server struct { // value is the branch selector throughout this file ("s.core == nil" == legacy). core core.VMCP + // optimizerFactory builds a per-session optimizer over the core's advertised + // tools. Set only on the Serve path when the optimizer is enabled (nil otherwise, + // including the entire legacy server.New path, which decorates the session factory + // instead). When non-nil, Serve-path session registration advertises find_tool/ + // call_tool in place of the raw core tools and dispatches call_tool's inner + // invocation through core.CallTool. The shared store and cleanup are owned by the + // session manager; this is the resolved factory surfaced via Manager.OptimizerFactory. + optimizerFactory func(context.Context, []server.ServerTool) (optimizer.Optimizer, error) + // MCP protocol server (mark3labs/mcp-go) mcpServer *server.MCPServer @@ -1161,8 +1170,10 @@ func (s *Server) lazyInjectSessionTools(ctx context.Context) { // Re-derive the tool set the same way registration did, so cross-pod re-injection // matches the advertised set. On the Serve path (s.core != nil) that means a fresh // core.ListTools for the request identity (the core is stateless, so re-deriving on - // pod B is the cache-miss equivalent of the once-per-session registration call); on - // the legacy path it reads the factory-built session's tools via GetAdaptedTools. + // pod B is the cache-miss equivalent of the once-per-session registration call), + // optimizer-wrapped into find_tool/call_tool when the optimizer is enabled — both + // via serveSessionTools, the same helper registration uses; on the legacy path it + // reads the factory-built session's tools via GetAdaptedTools. // // Note: on the Serve path this lists under the CURRENT request identity, not the // session's bound identity. For the realistic same-principal load-balanced case they @@ -1173,7 +1184,7 @@ func (s *Server) lazyInjectSessionTools(ctx context.Context) { adaptedTools, err := func() ([]server.ServerTool, error) { if s.core != nil { identity, _ := auth.IdentityFromContext(ctx) - return s.coreSessionTools(ctx, sessionID, identity) + return s.serveSessionTools(ctx, sessionID, identity) } return s.vmcpSessionMgr.GetAdaptedTools(sessionID) }() diff --git a/pkg/vmcp/server/sessionmanager/factory.go b/pkg/vmcp/server/sessionmanager/factory.go index 8adf30ed44..d4e2e6897e 100644 --- a/pkg/vmcp/server/sessionmanager/factory.go +++ b/pkg/vmcp/server/sessionmanager/factory.go @@ -74,6 +74,18 @@ type FactoryConfig struct { // 0 uses defaultCacheCapacity (1000). Negative values are rejected by // sessionmanager.New. CacheCapacity int + + // AdvertiseFromCore signals that the advertised capability set is sourced from + // the core (the Serve path), not from this factory's per-session aggregation. + // When true, New still resolves the optimizer factory (owning its store and + // cleanup) and exposes it via Manager.OptimizerFactory so the Serve layer can + // build a per-session optimizer over the core's tools — but it does NOT install + // the optimizer decorator on the session factory. Installing it would upsert a + // second, divergent tool set (the factory's raw, un-aggregated tools) into the + // shared FTS5 store alongside the core-sourced set, i.e. double-index the store. + // Has no effect when the optimizer is disabled. The legacy server.New path leaves + // this false, so its optimizer decorator is unchanged. + AdvertiseFromCore bool } // resolveOptimizer wires the optimizer factory from cfg, applying telemetry @@ -141,7 +153,10 @@ func buildDecoratingFactory( if len(cfg.WorkflowDefs) > 0 { decorators = append(decorators, compositeToolsDecorator(cfg.WorkflowDefs, cfg.ComposerFactory, instruments)) } - if optimizerFactory != nil { + // On the Serve path (AdvertiseFromCore) the optimizer is built by the Serve layer + // over the core's advertised set, so the factory's optimizer decorator is skipped + // to avoid double-indexing the shared store (see FactoryConfig.AdvertiseFromCore). + if optimizerFactory != nil && !cfg.AdvertiseFromCore { decorators = append(decorators, optimizerDecoratorFn(optimizerFactory, terminateSession)) } diff --git a/pkg/vmcp/server/sessionmanager/session_manager.go b/pkg/vmcp/server/sessionmanager/session_manager.go index 1164a8dbc2..a61d3408f6 100644 --- a/pkg/vmcp/server/sessionmanager/session_manager.go +++ b/pkg/vmcp/server/sessionmanager/session_manager.go @@ -30,6 +30,7 @@ import ( transportsession "github.com/stacklok/toolhive/pkg/transport/session" "github.com/stacklok/toolhive/pkg/vmcp" "github.com/stacklok/toolhive/pkg/vmcp/conversion" + "github.com/stacklok/toolhive/pkg/vmcp/optimizer" vmcpsession "github.com/stacklok/toolhive/pkg/vmcp/session" sessiontypes "github.com/stacklok/toolhive/pkg/vmcp/session/types" ) @@ -85,6 +86,12 @@ type Manager struct { // session from stored metadata; on a cache hit it confirms liveness via // storage.Load, which also refreshes the Redis TTL. sessions *cache.ValidatingCache[string, vmcpsession.MultiSession] + + // optimizerFactory is the resolved (telemetry-wrapped) optimizer factory, or + // nil when the optimizer is disabled. Surfaced via OptimizerFactory so the Serve + // path can build a per-session optimizer over the core's tools. The store and + // cleanup remain owned by this Manager (cleanup returned from New). + optimizerFactory func(context.Context, []mcpserver.ServerTool) (optimizer.Optimizer, error) } // New creates a Manager backed by the given SessionDataStorage and backend @@ -138,8 +145,9 @@ func New( // Build the Manager first so we can reference sm.Terminate and sm.sessions // directly in closures, eliminating the forward-reference variable pattern. sm := &Manager{ - storage: storage, - backendReg: backendRegistry, + storage: storage, + backendReg: backendRegistry, + optimizerFactory: optimizerFactory, } sm.sessions = cache.New( @@ -164,6 +172,19 @@ func New( return sm, cleanup, nil } +// OptimizerFactory returns the resolved (telemetry-wrapped) optimizer factory, or +// nil when the optimizer is disabled. +// +// It is consumed by the Serve path (FactoryConfig.AdvertiseFromCore), which builds +// a per-session optimizer over the core's advertised tool set rather than via the +// session decorator. The optimizer's shared store and its cleanup remain owned by +// this Manager (the cleanup function returned from New). On the legacy server.New +// path the factory is applied internally via the session decorator and this getter +// is unused. +func (m *Manager) OptimizerFactory() func(context.Context, []mcpserver.ServerTool) (optimizer.Optimizer, error) { + return m.optimizerFactory +} + // generateTimeout is the context deadline applied to the storage operations // inside Generate(). It provides a safety net in addition to the go-redis // client-level read/write timeouts. diff --git a/pkg/vmcp/session/optimizerdec/decorator.go b/pkg/vmcp/session/optimizerdec/decorator.go index 12d3fe5e96..b3e4454e88 100644 --- a/pkg/vmcp/session/optimizerdec/decorator.go +++ b/pkg/vmcp/session/optimizerdec/decorator.go @@ -54,31 +54,41 @@ type optimizerDecorator struct { // which routes through the instrumented optimizer (telemetry, traces, metrics). func NewDecorator(sess sessiontypes.MultiSession, opt optimizer.Optimizer) sessiontypes.MultiSession { return &optimizerDecorator{ - MultiSession: sess, - opt: opt, - optimizerTools: []vmcp.Tool{ - { - Name: FindToolName, - Description: "Find and return tools that can help accomplish the user's request. " + - "This searches available MCP server tools using semantic and keyword-based matching. " + - "Use this function when you need to: " + - "(1) discover what tools are available for a specific task, " + - "(2) find the right tool(s) before attempting to solve a problem, " + - "(3) check if required functionality exists in the current environment. " + - "Returns matching tools ranked by relevance including their names, descriptions, " + - "required parameters and schemas, plus token efficiency metrics showing " + - "baseline_tokens, returned_tokens, and savings_percent. " + - "Always call this before call_tool to discover the correct tool name and parameter schema.", - InputSchema: findToolInputSchema, - }, - { - Name: CallToolName, - Description: "Execute a specific tool with the provided parameters. " + - "Use this function to run a tool after identifying it with find_tool. " + - "Important: always use find_tool first to get the correct tool_name " + - "and parameter schema before calling this function.", - InputSchema: callToolInputSchema, - }, + MultiSession: sess, + opt: opt, + optimizerTools: OptimizerTools(), + } +} + +// OptimizerTools returns the find_tool and call_tool meta-tool definitions (name, +// description, input schema) that replace the full backend tool list in optimizer +// mode. The definitions are shared so that the legacy MultiSession decorator (this +// package) and the Serve-path optimizer wiring (pkg/vmcp/server) advertise an +// identical pair; each consumer wires its own handlers around these definitions. +// A fresh slice is returned on every call so callers cannot mutate shared state. +func OptimizerTools() []vmcp.Tool { + return []vmcp.Tool{ + { + Name: FindToolName, + Description: "Find and return tools that can help accomplish the user's request. " + + "This searches available MCP server tools using semantic and keyword-based matching. " + + "Use this function when you need to: " + + "(1) discover what tools are available for a specific task, " + + "(2) find the right tool(s) before attempting to solve a problem, " + + "(3) check if required functionality exists in the current environment. " + + "Returns matching tools ranked by relevance including their names, descriptions, " + + "required parameters and schemas, plus token efficiency metrics showing " + + "baseline_tokens, returned_tokens, and savings_percent. " + + "Always call this before call_tool to discover the correct tool name and parameter schema.", + InputSchema: findToolInputSchema, + }, + { + Name: CallToolName, + Description: "Execute a specific tool with the provided parameters. " + + "Use this function to run a tool after identifying it with find_tool. " + + "Important: always use find_tool first to get the correct tool_name " + + "and parameter schema before calling this function.", + InputSchema: callToolInputSchema, }, } } From 0b584b80d7a7e9515f07dd944481cac50f0ba86d Mon Sep 17 00:00:00 2001 From: Trey Date: Wed, 17 Jun 2026 07:18:06 -0700 Subject: [PATCH 2/4] Guard nil find_tool output and document optimizer re-index cost Addresses stacklok/toolhive#5543 review comments: - MEDIUM serve_optimizer.go (3428801576): add the output == nil guard to optimizerFindToolHandler for parity with the legacy/sibling handlers, so a nil result cannot marshal to "null" and surface as a success - LOW serve_optimizer.go (3428801603): document that optimizerSessionTools re-upserts on every registration and cross-pod rehydration (idempotent by PK, repeated work not a leak; optimization deferred to #5445) --- pkg/vmcp/server/serve_optimizer.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pkg/vmcp/server/serve_optimizer.go b/pkg/vmcp/server/serve_optimizer.go index 07e9d386a6..ea14874d10 100644 --- a/pkg/vmcp/server/serve_optimizer.go +++ b/pkg/vmcp/server/serve_optimizer.go @@ -66,6 +66,13 @@ func (s *Server) serveSessionTools( func (s *Server) optimizerSessionTools( ctx context.Context, sessionID string, coreTools []server.ServerTool, ) ([]server.ServerTool, error) { + // This runs once per registration AND once per cross-pod lazyInjectSessionTools + // rehydration; each build re-upserts coreTools into the shared store (and, when + // embeddings are configured, an embedding round-trip per tool). Rows do not + // accumulate — the store upserts by tool-name PK (INSERT OR REPLACE) — so this is + // repeated work, not a leak. Acceptable while the Serve path is test-only; + // skipping the re-upsert on rehydration is a deferred optimization (tracked for + // #5445), not done now to avoid premature optimization without measured evidence. opt, err := s.optimizerFactory(ctx, coreTools) if err != nil { return nil, fmt.Errorf("build session optimizer: %w", err) @@ -140,6 +147,12 @@ func (s *Server) optimizerFindToolHandler(sessionID string, opt optimizer.Optimi if err != nil { return mcp.NewToolResultError(fmt.Sprintf("find_tool failed: %v", err)), nil } + // Defensive parity with the legacy optimizerdec handler (and the sibling + // call_tool handler): the production optimizer never returns (nil, nil), but + // guard so a nil output cannot marshal to "null" and surface as a success. + if output == nil { + return mcp.NewToolResultError("find_tool: optimizer returned nil result"), nil + } jsonBytes, err := json.Marshal(output) if err != nil { From 9412c3be9a58524a5ee09f7c7dea35c53427a932 Mon Sep 17 00:00:00 2001 From: Trey Date: Wed, 17 Jun 2026 07:19:28 -0700 Subject: [PATCH 3/4] Strengthen Serve optimizer tests Addresses stacklok/toolhive#5543 review comments: - LOW serve_optimizer_test.go (3428801613): assert find_tool returns exactly the core's advertised tool names by decoding into FindToolOutput, instead of substring-matching the whole marshalled body; note ranking is out of scope - LOW serve_optimizer.go (3428801607): add a test that optimizerToolHandler rejects an unknown meta-tool name, locking in the defensive default branch - LOW serve_optimizer_test.go (3428801618): assert the cross-pod rehydration re-aggregates via a fresh core.ListTools and rebuilds the optimizer (the half of AC5 the test did not previously prove) --- pkg/vmcp/server/serve_optimizer_test.go | 47 ++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/pkg/vmcp/server/serve_optimizer_test.go b/pkg/vmcp/server/serve_optimizer_test.go index 5225d54492..62a65cb100 100644 --- a/pkg/vmcp/server/serve_optimizer_test.go +++ b/pkg/vmcp/server/serve_optimizer_test.go @@ -237,10 +237,36 @@ func TestServeOptimizerFindToolReturnsCoreTools(t *testing.T) { require.NotNil(t, res) assert.False(t, res.IsError) - body, err := json.Marshal(res) - require.NoError(t, err) - assert.Contains(t, string(body), "tool-a", "find_tool must return matches from the core's advertised set") - assert.Contains(t, string(body), "tool-b") + // Decode the result text into the typed output and assert the matched tool names + // exactly, rather than substring-matching the whole body (which could pass on an + // unrelated field). Real search/ranking is out of scope per the issue — the test + // optimizer returns the whole advertised set — so this asserts find_tool surfaces + // the core's set, not the relevance ordering. + require.Len(t, res.Content, 1) + text, ok := res.Content[0].(mcp.TextContent) + require.True(t, ok, "find_tool result content must be text") + var out optimizer.FindToolOutput + require.NoError(t, json.Unmarshal([]byte(text.Text), &out)) + names := make([]string, 0, len(out.Tools)) + for _, tl := range out.Tools { + names = append(names, tl.Name) + } + assert.ElementsMatch(t, []string{"tool-a", "tool-b"}, names, + "find_tool must return exactly the core's advertised set") +} + +// TestServeOptimizerToolHandlerRejectsUnknownMetaTool locks in the defensive default +// branch of optimizerToolHandler: a definition advertised by OptimizerTools() without a +// wired handler must fail at registration (a non-nil error), not silently produce a nil +// handler that would only blow up at call time. +func TestServeOptimizerToolHandlerRejectsUnknownMetaTool(t *testing.T) { + t.Parallel() + + srv := &Server{} + handler, err := srv.optimizerToolHandler("sess", "bogus", &dispatchOptimizer{}) + require.Error(t, err) + assert.Nil(t, handler) + assert.ErrorContains(t, err, "bogus") } // TestServeOptimizerCallToolRoutesThroughCore proves call_tool dispatches the inner @@ -349,7 +375,13 @@ func TestServeOptimizerLazyInjectsForRehydratedSession(t *testing.T) { t.Parallel() fc := &fakeCore{tools: []vmcp.Tool{{Name: "tool-a"}}} - srv, sessionID, _, _ := registerServeOptimizerSession(t, fc) + srv, sessionID, _, optFactory := registerServeOptimizerSession(t, fc) + + // Capture the registration-time counts so we can prove the rehydration REBUILDS + // (the half of AC5 that distinguishes the Serve path from the legacy GetAdaptedTools): + // a fresh core.ListTools aggregation feeds a freshly built optimizer. + listBefore := fc.listToolsCalls.Load() + buildsBefore := optFactory.calls.Load() rehydrated := &fakeSDKSession{id: sessionID, tools: map[string]server.ServerTool{}} srv.lazyInjectSessionTools(srv.mcpServer.WithContext(context.Background(), rehydrated)) @@ -360,4 +392,9 @@ func TestServeOptimizerLazyInjectsForRehydratedSession(t *testing.T) { "cross-pod re-injection must advertise call_tool") assert.NotContains(t, rehydrated.tools, "tool-a", "cross-pod re-injection must not advertise raw core tools in optimizer mode") + + assert.Equal(t, listBefore+1, fc.listToolsCalls.Load(), + "re-injection must re-derive from a fresh core.ListTools, not a cached set") + assert.Equal(t, buildsBefore+1, optFactory.calls.Load(), + "re-injection must rebuild the optimizer over the freshly aggregated set") } From 11bd32dae017118959e0c70c031b0e39788fcc25 Mon Sep 17 00:00:00 2001 From: Trey Date: Wed, 17 Jun 2026 07:22:15 -0700 Subject: [PATCH 4/4] Enforce the AdvertiseFromCore double-index contract Addresses stacklok/toolhive#5543 review comments: - MEDIUM factory.go (3428801597): make AC6 structural instead of an unenforced contract. Manager surfaces the optimizer factory via OptimizerFactory() only when AdvertiseFromCore is true, so the session-factory decorator (used iff !AdvertiseFromCore) and the Serve-layer getter become mutually exclusive writers of the shared FTS5 store. A Serve composition root that enables the optimizer but forgets the flag now gets a nil factory (no Serve-layer optimizer) rather than a silent double-index. Adds a test locking in the gate in both directions. --- pkg/vmcp/server/sessionmanager/factory.go | 21 ++++--- .../sessionmanager/optimizer_gate_test.go | 62 +++++++++++++++++++ .../server/sessionmanager/session_manager.go | 28 ++++++--- 3 files changed, 95 insertions(+), 16 deletions(-) create mode 100644 pkg/vmcp/server/sessionmanager/optimizer_gate_test.go diff --git a/pkg/vmcp/server/sessionmanager/factory.go b/pkg/vmcp/server/sessionmanager/factory.go index d4e2e6897e..1bd2c25f1d 100644 --- a/pkg/vmcp/server/sessionmanager/factory.go +++ b/pkg/vmcp/server/sessionmanager/factory.go @@ -77,14 +77,19 @@ type FactoryConfig struct { // AdvertiseFromCore signals that the advertised capability set is sourced from // the core (the Serve path), not from this factory's per-session aggregation. - // When true, New still resolves the optimizer factory (owning its store and - // cleanup) and exposes it via Manager.OptimizerFactory so the Serve layer can - // build a per-session optimizer over the core's tools — but it does NOT install - // the optimizer decorator on the session factory. Installing it would upsert a - // second, divergent tool set (the factory's raw, un-aggregated tools) into the - // shared FTS5 store alongside the core-sourced set, i.e. double-index the store. - // Has no effect when the optimizer is disabled. The legacy server.New path leaves - // this false, so its optimizer decorator is unchanged. + // + // It is the single switch that selects WHICH layer indexes the shared FTS5 store, + // so the two can never both do it (the AC6 double-index): + // - true: New does NOT install the optimizer decorator on the session factory, + // and exposes the resolved factory via Manager.OptimizerFactory so the + // Serve layer builds a per-session optimizer over the core's tools. + // - false: New installs the optimizer decorator (legacy behavior) and + // Manager.OptimizerFactory returns nil, so a Serve composition root that + // enables the optimizer but forgets this flag gets no Serve-layer + // optimizer rather than a second, divergent upsert into the store. + // Either way New resolves the optimizer factory and owns its store/cleanup. Has no + // effect when the optimizer is disabled. The legacy server.New path leaves this + // false, so its optimizer decorator is unchanged. AdvertiseFromCore bool } diff --git a/pkg/vmcp/server/sessionmanager/optimizer_gate_test.go b/pkg/vmcp/server/sessionmanager/optimizer_gate_test.go new file mode 100644 index 0000000000..25bf9c1c8d --- /dev/null +++ b/pkg/vmcp/server/sessionmanager/optimizer_gate_test.go @@ -0,0 +1,62 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package sessionmanager + +import ( + "context" + "testing" + + mcpserver "github.com/mark3labs/mcp-go/server" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/mock/gomock" + + "github.com/stacklok/toolhive/pkg/vmcp/optimizer" +) + +// TestOptimizerFactoryGatedOnAdvertiseFromCore locks in the AC6 double-index +// guarantee: New surfaces the optimizer factory via OptimizerFactory() ONLY when +// AdvertiseFromCore is true. That makes the session-factory decorator (installed iff +// !AdvertiseFromCore) and the Serve-layer getter mutually exclusive store writers, so a +// Serve composition root that enables the optimizer but forgets the flag gets a nil +// factory (no Serve-layer optimizer) instead of a silent second upsert into the store. +func TestOptimizerFactoryGatedOnAdvertiseFromCore(t *testing.T) { + t.Parallel() + + optFactory := func(context.Context, []mcpserver.ServerTool) (optimizer.Optimizer, error) { + return nil, nil + } + + tests := []struct { + name string + advertiseFromCore bool + wantSurfaced bool + }{ + {"surfaced to Serve when advertising from core", true, true}, + {"not surfaced on the legacy path (decorator owns it)", false, false}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + ctrl := gomock.NewController(t) + factory := newMockFactory(t, ctrl, newMockSession(t, ctrl, "", nil)) + + sm, cleanup, err := New(newTestSessionDataStorage(t), &FactoryConfig{ + Base: factory, + OptimizerFactory: optFactory, + AdvertiseFromCore: tc.advertiseFromCore, + }, newFakeRegistry()) + require.NoError(t, err) + t.Cleanup(func() { _ = cleanup(context.Background()) }) + + if tc.wantSurfaced { + assert.NotNil(t, sm.OptimizerFactory(), + "the factory must be surfaced to the Serve layer when AdvertiseFromCore is set") + } else { + assert.Nil(t, sm.OptimizerFactory(), + "the factory must NOT be surfaced when AdvertiseFromCore is false (decorator owns the store)") + } + }) + } +} diff --git a/pkg/vmcp/server/sessionmanager/session_manager.go b/pkg/vmcp/server/sessionmanager/session_manager.go index a61d3408f6..54076d047e 100644 --- a/pkg/vmcp/server/sessionmanager/session_manager.go +++ b/pkg/vmcp/server/sessionmanager/session_manager.go @@ -145,9 +145,20 @@ func New( // Build the Manager first so we can reference sm.Terminate and sm.sessions // directly in closures, eliminating the forward-reference variable pattern. sm := &Manager{ - storage: storage, - backendReg: backendRegistry, - optimizerFactory: optimizerFactory, + storage: storage, + backendReg: backendRegistry, + } + + // Surface the resolved optimizer factory to the Serve path ONLY when + // AdvertiseFromCore is set. That makes the two store writers mutually exclusive: + // the session-factory decorator runs iff !AdvertiseFromCore (buildDecoratingFactory + // below), and OptimizerFactory() returns a non-nil factory iff AdvertiseFromCore — + // so a Serve composition root that enables the optimizer but forgets the flag gets a + // nil factory (no Serve-layer optimizer) rather than a silent double-index of the + // shared FTS5 store. The legacy server.New path leaves AdvertiseFromCore false and + // never calls OptimizerFactory(), so its decorator is unaffected. + if cfg.AdvertiseFromCore { + sm.optimizerFactory = optimizerFactory } sm.sessions = cache.New( @@ -173,14 +184,15 @@ func New( } // OptimizerFactory returns the resolved (telemetry-wrapped) optimizer factory, or -// nil when the optimizer is disabled. +// nil when the optimizer is disabled OR FactoryConfig.AdvertiseFromCore is false. // // It is consumed by the Serve path (FactoryConfig.AdvertiseFromCore), which builds // a per-session optimizer over the core's advertised tool set rather than via the -// session decorator. The optimizer's shared store and its cleanup remain owned by -// this Manager (the cleanup function returned from New). On the legacy server.New -// path the factory is applied internally via the session decorator and this getter -// is unused. +// session decorator. Gating on AdvertiseFromCore makes the decorator and this getter +// mutually exclusive store writers, so the shared FTS5 store can never be double-indexed +// (see New). The optimizer's shared store and its cleanup remain owned by this Manager +// (the cleanup function returned from New). On the legacy server.New path the factory is +// applied internally via the session decorator and this getter is unused. func (m *Manager) OptimizerFactory() func(context.Context, []mcpserver.ServerTool) (optimizer.Optimizer, error) { return m.optimizerFactory }