Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .changeset/mcp-search-denoise.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---
'@hyperdx/api': patch
'@hyperdx/app': patch
'@hyperdx/common-utils': patch
---

feat(mcp): add denoise option to clickstack_search tool

Add a `denoise` boolean parameter to the MCP `clickstack_search` tool that
automatically filters out high-frequency repetitive event patterns from
search results, mirroring the web app's "Denoise Results" feature.

When enabled, the tool samples 10k random events, mines patterns using
the Drain algorithm, identifies noisy patterns (>10% of sample), and
filters them out of result rows. Returns filtered rows plus metadata
listing removed patterns with estimated counts.

Extracts shared denoise constants (`DENOISE_SAMPLE_SIZE`,
`DENOISE_NOISE_THRESHOLD`) into `@hyperdx/common-utils` so the web app
and MCP server use the same values.
104 changes: 104 additions & 0 deletions packages/api/src/mcp/__tests__/queryTool.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,110 @@ describe('MCP Query Tools', () => {
expect(result.isError).toBe(true);
expect(getFirstText(result)).toMatch(/sourceId/i);
});

it('should expose denoise property in schema', async () => {
const { tools } = await client.listTools();
const tool = tools.find(t => t.name === 'clickstack_search');
expect(tool).toBeDefined();
const props = Object.keys(tool!.inputSchema.properties ?? {});
expect(props).toContain('denoise');
});

it('should emit denoised block when denoise=true on empty results', async () => {
const result = await callTool(client, 'clickstack_search', {
sourceId: logSource._id.toString(),
denoise: true,
startTime: new Date(Date.now() - 60 * 60 * 1000).toISOString(),
endTime: new Date().toISOString(),
});

expect(result.isError).toBeFalsy();
// With no data, the denoised block should not appear because the
// search result itself has no rows to process (early return path).
const output = JSON.parse(getFirstText(result));
expect(output).toHaveProperty('result');
});

describe('denoise with seeded data', () => {
const now = new Date();
const fiveMinAgo = new Date(now.getTime() - 5 * 60 * 1000);

beforeEach(async () => {
const logs: Parameters<typeof bulkInsertLogs>[0] = [];

// Noisy pattern: "Health check OK from <ip>" — 80 rows (>10% threshold)
for (let i = 0; i < 80; i++) {
logs.push({
Body: `Health check OK from 10.0.${Math.floor(i / 256)}.${i % 256}`,
ServiceName: 'loadbalancer',
SeverityText: 'INFO',
Timestamp: new Date(fiveMinAgo.getTime() + i * 100),
});
}

// Unique/rare events — 5 rows (well below 10% threshold)
for (let i = 0; i < 5; i++) {
logs.push({
Body: `Rare event type ${String.fromCharCode(65 + i)} occurred in subsystem`,
ServiceName: 'worker',
SeverityText: 'WARN',
Timestamp: new Date(fiveMinAgo.getTime() + (80 + i) * 1000),
});
}

await bulkInsertLogs(logs);
});

it('should filter noisy patterns and emit denoised metadata', async () => {
const result = await callTool(client, 'clickstack_search', {
sourceId: logSource._id.toString(),
denoise: true,
maxResults: 200,
startTime: new Date(now.getTime() - 10 * 60 * 1000).toISOString(),
endTime: new Date(now.getTime() + 60 * 1000).toISOString(),
});

expect(result.isError).toBeFalsy();
const output = JSON.parse(getFirstText(result));

// Must have a denoised block
expect(output).toHaveProperty('denoised');
expect(output.denoised).toHaveProperty('removedPatterns');
expect(output.denoised).toHaveProperty('returnedRowCountBeforeDenoise');
expect(output.denoised).toHaveProperty('filteredRowCount');

// Should not have a skipped reason
expect(output.denoised.skipped).toBeUndefined();

// The noisy health check pattern should be in removedPatterns
expect(output.denoised.removedPatterns.length).toBeGreaterThanOrEqual(
1,
);
const healthPattern = output.denoised.removedPatterns.find(
(p: { pattern: string }) => p.pattern.includes('Health check'),
);
expect(healthPattern).toBeDefined();

// Filtered count should be less than original
expect(output.denoised.filteredRowCount).toBeLessThan(
output.denoised.returnedRowCountBeforeDenoise,
);
});

it('should return results without denoised block when denoise=false', async () => {
const result = await callTool(client, 'clickstack_search', {
sourceId: logSource._id.toString(),
denoise: false,
maxResults: 200,
startTime: new Date(now.getTime() - 10 * 60 * 1000).toISOString(),
endTime: new Date(now.getTime() + 60 * 1000).toISOString(),
});

expect(result.isError).toBeFalsy();
const output = JSON.parse(getFirstText(result));
expect(output).not.toHaveProperty('denoised');
});
});
});

// ─── clickstack_event_patterns ─────────────────────────────────────────────────
Expand Down
Loading
Loading